metacrunch-mab2 1.2.0 → 1.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/metacrunch/mab2/document/jruby/mab_xml_parser.rb +45 -0
- data/lib/metacrunch/mab2/document/{aleph_mab_xml_parser.rb → mab_xml_parser.rb} +3 -10
- data/lib/metacrunch/mab2/document.rb +12 -7
- data/lib/metacrunch/mab2/version.rb +1 -1
- data/lib/metacrunch/mab2.rb +2 -1
- data/metacrunch-mab2.gemspec +8 -1
- metadata +10 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1f3f60e61efdc2d800bb156ef2af37b56f005d97
|
4
|
+
data.tar.gz: d08b0a6f38b526dbbc29c301c84461b30e7e41f7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8e727ba7003c508232814161f26ffad6b5a496d20376a82bb35e7bc30256535730a6b2e9a74a09e833565654dcb76950d04a76cf9234743be386fbae19dcac5d
|
7
|
+
data.tar.gz: 9cd3dfaf0acf24f3a9c581380a07477c7280df0e964e17c9bd7ec53db0b8d3e0db67cee4ca7eed12de2088417056aeb11d7ec22cb510c064c0bc8a0f406787b4
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Metacrunch
|
2
|
+
module Mab2
|
3
|
+
class Document
|
4
|
+
class MabXmlParser
|
5
|
+
|
6
|
+
def parse(mab_xml)
|
7
|
+
xml = Nokogiri::XML(mab_xml.try(:strip))
|
8
|
+
xml.remove_namespaces!
|
9
|
+
|
10
|
+
html_entities = HTMLEntities.new
|
11
|
+
document = Document.new
|
12
|
+
|
13
|
+
xml.xpath("//metadata/record").children.each do |node|
|
14
|
+
if node.name == "controlfield"
|
15
|
+
controlfield = Metacrunch::Mab2::Document::Controlfield.new
|
16
|
+
controlfield.tag = node.attributes["tag"].value
|
17
|
+
controlfield.values = node.content
|
18
|
+
document.add_controlfield(controlfield)
|
19
|
+
elsif node.name == "datafield" then
|
20
|
+
datafield = Metacrunch::Mab2::Document::Datafield.new
|
21
|
+
datafield.tag = node.attributes["tag"].value
|
22
|
+
datafield.ind1 = node.attributes["ind1"].try(:value)
|
23
|
+
datafield.ind2 = node.attributes["ind2"].try(:value)
|
24
|
+
|
25
|
+
node.children.each do |subnode|
|
26
|
+
if subnode.name == "subfield"
|
27
|
+
subfield = Metacrunch::Mab2::Document::Subfield.new
|
28
|
+
subfield.code = subnode.attributes["code"].value
|
29
|
+
value = subnode.content
|
30
|
+
subfield.value = value.include?("&") ? html_entities.decode(value) : value
|
31
|
+
datafield.add_subfield(subfield)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
document.add_datafield(datafield)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
document
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -1,16 +1,11 @@
|
|
1
|
-
require "htmlentities"
|
2
1
|
require "ox"
|
3
|
-
require_relative "../document"
|
4
2
|
|
5
3
|
module Metacrunch
|
6
4
|
module Mab2
|
7
5
|
class Document
|
8
|
-
class
|
9
|
-
def self.parse(aleph_mab_xml)
|
10
|
-
new.parse(aleph_mab_xml)
|
11
|
-
end
|
6
|
+
class MabXmlParser < Ox::Sax
|
12
7
|
|
13
|
-
def parse(
|
8
|
+
def parse(mab_xml)
|
14
9
|
# initialize state machine
|
15
10
|
@in_controlfield = @in_datafield = @in_subfield = false
|
16
11
|
|
@@ -18,11 +13,9 @@ module Metacrunch
|
|
18
13
|
@document = Document.new
|
19
14
|
@html_entities_coder = HTMLEntities.new
|
20
15
|
|
21
|
-
io = io_or_string.is_a?(IO) ? io_or_string : StringIO.new(io_or_string)
|
22
|
-
|
23
16
|
# convert_special tells ox to convert some html entities already during
|
24
17
|
# parsing, which minifies the amount of entities we have to decode ourself
|
25
|
-
Ox.sax_parse(self,
|
18
|
+
Ox.sax_parse(self, mab_xml, convert_special: true)
|
26
19
|
|
27
20
|
return @document
|
28
21
|
end
|
@@ -1,12 +1,17 @@
|
|
1
1
|
module Metacrunch
|
2
2
|
module Mab2
|
3
3
|
class Document
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
4
|
+
if RUBY_PLATFORM == "java"
|
5
|
+
require_relative "document/jruby/mab_xml_parser"
|
6
|
+
else
|
7
|
+
require_relative "document/mab_xml_parser"
|
8
|
+
end
|
9
|
+
|
10
|
+
require_relative "document/controlfield"
|
11
|
+
require_relative "document/datafield"
|
12
|
+
require_relative "document/datafield_set"
|
13
|
+
require_relative "document/subfield"
|
14
|
+
require_relative "document/subfield_set"
|
10
15
|
|
11
16
|
# ------------------------------------------------------------------------------
|
12
17
|
# Parsing
|
@@ -17,7 +22,7 @@ module Metacrunch
|
|
17
22
|
# @return [Metacrunch::Mab2::Document]
|
18
23
|
#
|
19
24
|
def self.from_aleph_mab_xml(xml)
|
20
|
-
|
25
|
+
MabXmlParser.new.parse(xml)
|
21
26
|
end
|
22
27
|
|
23
28
|
def initialize
|
data/lib/metacrunch/mab2.rb
CHANGED
data/metacrunch-mab2.gemspec
CHANGED
@@ -20,7 +20,14 @@ Gem::Specification.new do |spec|
|
|
20
20
|
spec.add_dependency "activesupport", "~> 4.2"
|
21
21
|
spec.add_dependency "htmlentities", "~> 4.3"
|
22
22
|
spec.add_dependency "nokogiri", "~> 1.6"
|
23
|
-
spec.add_dependency "ox", "~> 2.1"
|
24
23
|
spec.add_dependency "self_enumerable", "~> 0.2"
|
24
|
+
|
25
|
+
if RUBY_PLATFORM != "java"
|
26
|
+
spec.add_dependency "ox", "~> 2.1"
|
27
|
+
end
|
28
|
+
|
29
|
+
if RUBY_PLATFORM == "java"
|
30
|
+
spec.platform = Gem::Platform::CURRENT
|
31
|
+
end
|
25
32
|
end
|
26
33
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metacrunch-mab2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- René Sprotte
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: exe
|
12
12
|
cert_chain: []
|
13
|
-
date: 2016-04-
|
13
|
+
date: 2016-04-25 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: activesupport
|
@@ -55,33 +55,33 @@ dependencies:
|
|
55
55
|
- !ruby/object:Gem::Version
|
56
56
|
version: '1.6'
|
57
57
|
- !ruby/object:Gem::Dependency
|
58
|
-
name:
|
58
|
+
name: self_enumerable
|
59
59
|
requirement: !ruby/object:Gem::Requirement
|
60
60
|
requirements:
|
61
61
|
- - "~>"
|
62
62
|
- !ruby/object:Gem::Version
|
63
|
-
version: '2
|
63
|
+
version: '0.2'
|
64
64
|
type: :runtime
|
65
65
|
prerelease: false
|
66
66
|
version_requirements: !ruby/object:Gem::Requirement
|
67
67
|
requirements:
|
68
68
|
- - "~>"
|
69
69
|
- !ruby/object:Gem::Version
|
70
|
-
version: '2
|
70
|
+
version: '0.2'
|
71
71
|
- !ruby/object:Gem::Dependency
|
72
|
-
name:
|
72
|
+
name: ox
|
73
73
|
requirement: !ruby/object:Gem::Requirement
|
74
74
|
requirements:
|
75
75
|
- - "~>"
|
76
76
|
- !ruby/object:Gem::Version
|
77
|
-
version: '
|
77
|
+
version: '2.1'
|
78
78
|
type: :runtime
|
79
79
|
prerelease: false
|
80
80
|
version_requirements: !ruby/object:Gem::Requirement
|
81
81
|
requirements:
|
82
82
|
- - "~>"
|
83
83
|
- !ruby/object:Gem::Version
|
84
|
-
version: '
|
84
|
+
version: '2.1'
|
85
85
|
description:
|
86
86
|
email: r.sprotte@ub.uni-paderborn.de
|
87
87
|
executables: []
|
@@ -100,10 +100,11 @@ files:
|
|
100
100
|
- lib/metacrunch/mab2/builder.rb
|
101
101
|
- lib/metacrunch/mab2/cli.rb
|
102
102
|
- lib/metacrunch/mab2/document.rb
|
103
|
-
- lib/metacrunch/mab2/document/aleph_mab_xml_parser.rb
|
104
103
|
- lib/metacrunch/mab2/document/controlfield.rb
|
105
104
|
- lib/metacrunch/mab2/document/datafield.rb
|
106
105
|
- lib/metacrunch/mab2/document/datafield_set.rb
|
106
|
+
- lib/metacrunch/mab2/document/jruby/mab_xml_parser.rb
|
107
|
+
- lib/metacrunch/mab2/document/mab_xml_parser.rb
|
107
108
|
- lib/metacrunch/mab2/document/subfield.rb
|
108
109
|
- lib/metacrunch/mab2/document/subfield_set.rb
|
109
110
|
- lib/metacrunch/mab2/version.rb
|