metacrunch-mab2 1.2.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/metacrunch/mab2/document/jruby/mab_xml_parser.rb +45 -0
- data/lib/metacrunch/mab2/document/{aleph_mab_xml_parser.rb → mab_xml_parser.rb} +3 -10
- data/lib/metacrunch/mab2/document.rb +12 -7
- data/lib/metacrunch/mab2/version.rb +1 -1
- data/lib/metacrunch/mab2.rb +2 -1
- data/metacrunch-mab2.gemspec +8 -1
- metadata +10 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1f3f60e61efdc2d800bb156ef2af37b56f005d97
|
4
|
+
data.tar.gz: d08b0a6f38b526dbbc29c301c84461b30e7e41f7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8e727ba7003c508232814161f26ffad6b5a496d20376a82bb35e7bc30256535730a6b2e9a74a09e833565654dcb76950d04a76cf9234743be386fbae19dcac5d
|
7
|
+
data.tar.gz: 9cd3dfaf0acf24f3a9c581380a07477c7280df0e964e17c9bd7ec53db0b8d3e0db67cee4ca7eed12de2088417056aeb11d7ec22cb510c064c0bc8a0f406787b4
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Metacrunch
|
2
|
+
module Mab2
|
3
|
+
class Document
|
4
|
+
class MabXmlParser
|
5
|
+
|
6
|
+
def parse(mab_xml)
|
7
|
+
xml = Nokogiri::XML(mab_xml.try(:strip))
|
8
|
+
xml.remove_namespaces!
|
9
|
+
|
10
|
+
html_entities = HTMLEntities.new
|
11
|
+
document = Document.new
|
12
|
+
|
13
|
+
xml.xpath("//metadata/record").children.each do |node|
|
14
|
+
if node.name == "controlfield"
|
15
|
+
controlfield = Metacrunch::Mab2::Document::Controlfield.new
|
16
|
+
controlfield.tag = node.attributes["tag"].value
|
17
|
+
controlfield.values = node.content
|
18
|
+
document.add_controlfield(controlfield)
|
19
|
+
elsif node.name == "datafield" then
|
20
|
+
datafield = Metacrunch::Mab2::Document::Datafield.new
|
21
|
+
datafield.tag = node.attributes["tag"].value
|
22
|
+
datafield.ind1 = node.attributes["ind1"].try(:value)
|
23
|
+
datafield.ind2 = node.attributes["ind2"].try(:value)
|
24
|
+
|
25
|
+
node.children.each do |subnode|
|
26
|
+
if subnode.name == "subfield"
|
27
|
+
subfield = Metacrunch::Mab2::Document::Subfield.new
|
28
|
+
subfield.code = subnode.attributes["code"].value
|
29
|
+
value = subnode.content
|
30
|
+
subfield.value = value.include?("&") ? html_entities.decode(value) : value
|
31
|
+
datafield.add_subfield(subfield)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
document.add_datafield(datafield)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
document
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -1,16 +1,11 @@
|
|
1
|
-
require "htmlentities"
|
2
1
|
require "ox"
|
3
|
-
require_relative "../document"
|
4
2
|
|
5
3
|
module Metacrunch
|
6
4
|
module Mab2
|
7
5
|
class Document
|
8
|
-
class
|
9
|
-
def self.parse(aleph_mab_xml)
|
10
|
-
new.parse(aleph_mab_xml)
|
11
|
-
end
|
6
|
+
class MabXmlParser < Ox::Sax
|
12
7
|
|
13
|
-
def parse(
|
8
|
+
def parse(mab_xml)
|
14
9
|
# initialize state machine
|
15
10
|
@in_controlfield = @in_datafield = @in_subfield = false
|
16
11
|
|
@@ -18,11 +13,9 @@ module Metacrunch
|
|
18
13
|
@document = Document.new
|
19
14
|
@html_entities_coder = HTMLEntities.new
|
20
15
|
|
21
|
-
io = io_or_string.is_a?(IO) ? io_or_string : StringIO.new(io_or_string)
|
22
|
-
|
23
16
|
# convert_special tells ox to convert some html entities already during
|
24
17
|
# parsing, which minifies the amount of entities we have to decode ourself
|
25
|
-
Ox.sax_parse(self,
|
18
|
+
Ox.sax_parse(self, mab_xml, convert_special: true)
|
26
19
|
|
27
20
|
return @document
|
28
21
|
end
|
@@ -1,12 +1,17 @@
|
|
1
1
|
module Metacrunch
|
2
2
|
module Mab2
|
3
3
|
class Document
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
4
|
+
if RUBY_PLATFORM == "java"
|
5
|
+
require_relative "document/jruby/mab_xml_parser"
|
6
|
+
else
|
7
|
+
require_relative "document/mab_xml_parser"
|
8
|
+
end
|
9
|
+
|
10
|
+
require_relative "document/controlfield"
|
11
|
+
require_relative "document/datafield"
|
12
|
+
require_relative "document/datafield_set"
|
13
|
+
require_relative "document/subfield"
|
14
|
+
require_relative "document/subfield_set"
|
10
15
|
|
11
16
|
# ------------------------------------------------------------------------------
|
12
17
|
# Parsing
|
@@ -17,7 +22,7 @@ module Metacrunch
|
|
17
22
|
# @return [Metacrunch::Mab2::Document]
|
18
23
|
#
|
19
24
|
def self.from_aleph_mab_xml(xml)
|
20
|
-
|
25
|
+
MabXmlParser.new.parse(xml)
|
21
26
|
end
|
22
27
|
|
23
28
|
def initialize
|
data/lib/metacrunch/mab2.rb
CHANGED
data/metacrunch-mab2.gemspec
CHANGED
@@ -20,7 +20,14 @@ Gem::Specification.new do |spec|
|
|
20
20
|
spec.add_dependency "activesupport", "~> 4.2"
|
21
21
|
spec.add_dependency "htmlentities", "~> 4.3"
|
22
22
|
spec.add_dependency "nokogiri", "~> 1.6"
|
23
|
-
spec.add_dependency "ox", "~> 2.1"
|
24
23
|
spec.add_dependency "self_enumerable", "~> 0.2"
|
24
|
+
|
25
|
+
if RUBY_PLATFORM != "java"
|
26
|
+
spec.add_dependency "ox", "~> 2.1"
|
27
|
+
end
|
28
|
+
|
29
|
+
if RUBY_PLATFORM == "java"
|
30
|
+
spec.platform = Gem::Platform::CURRENT
|
31
|
+
end
|
25
32
|
end
|
26
33
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metacrunch-mab2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- René Sprotte
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: exe
|
12
12
|
cert_chain: []
|
13
|
-
date: 2016-04-
|
13
|
+
date: 2016-04-25 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: activesupport
|
@@ -55,33 +55,33 @@ dependencies:
|
|
55
55
|
- !ruby/object:Gem::Version
|
56
56
|
version: '1.6'
|
57
57
|
- !ruby/object:Gem::Dependency
|
58
|
-
name:
|
58
|
+
name: self_enumerable
|
59
59
|
requirement: !ruby/object:Gem::Requirement
|
60
60
|
requirements:
|
61
61
|
- - "~>"
|
62
62
|
- !ruby/object:Gem::Version
|
63
|
-
version: '2
|
63
|
+
version: '0.2'
|
64
64
|
type: :runtime
|
65
65
|
prerelease: false
|
66
66
|
version_requirements: !ruby/object:Gem::Requirement
|
67
67
|
requirements:
|
68
68
|
- - "~>"
|
69
69
|
- !ruby/object:Gem::Version
|
70
|
-
version: '2
|
70
|
+
version: '0.2'
|
71
71
|
- !ruby/object:Gem::Dependency
|
72
|
-
name:
|
72
|
+
name: ox
|
73
73
|
requirement: !ruby/object:Gem::Requirement
|
74
74
|
requirements:
|
75
75
|
- - "~>"
|
76
76
|
- !ruby/object:Gem::Version
|
77
|
-
version: '
|
77
|
+
version: '2.1'
|
78
78
|
type: :runtime
|
79
79
|
prerelease: false
|
80
80
|
version_requirements: !ruby/object:Gem::Requirement
|
81
81
|
requirements:
|
82
82
|
- - "~>"
|
83
83
|
- !ruby/object:Gem::Version
|
84
|
-
version: '
|
84
|
+
version: '2.1'
|
85
85
|
description:
|
86
86
|
email: r.sprotte@ub.uni-paderborn.de
|
87
87
|
executables: []
|
@@ -100,10 +100,11 @@ files:
|
|
100
100
|
- lib/metacrunch/mab2/builder.rb
|
101
101
|
- lib/metacrunch/mab2/cli.rb
|
102
102
|
- lib/metacrunch/mab2/document.rb
|
103
|
-
- lib/metacrunch/mab2/document/aleph_mab_xml_parser.rb
|
104
103
|
- lib/metacrunch/mab2/document/controlfield.rb
|
105
104
|
- lib/metacrunch/mab2/document/datafield.rb
|
106
105
|
- lib/metacrunch/mab2/document/datafield_set.rb
|
106
|
+
- lib/metacrunch/mab2/document/jruby/mab_xml_parser.rb
|
107
|
+
- lib/metacrunch/mab2/document/mab_xml_parser.rb
|
107
108
|
- lib/metacrunch/mab2/document/subfield.rb
|
108
109
|
- lib/metacrunch/mab2/document/subfield_set.rb
|
109
110
|
- lib/metacrunch/mab2/version.rb
|