metacrunch-mab2 1.2.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c4778eeae582a8416053b470b2bd54827cb7e68a
4
- data.tar.gz: 1e96e0e40130e5d1b91b186f061ea06ad8900c34
3
+ metadata.gz: 1f3f60e61efdc2d800bb156ef2af37b56f005d97
4
+ data.tar.gz: d08b0a6f38b526dbbc29c301c84461b30e7e41f7
5
5
  SHA512:
6
- metadata.gz: 6ad51835a53f4ab698b2c5f822ddee0470a00b775e8881b3aba0becfff6ed0e38b1e231eef89b91e1ef6d6b0e17438a91b8044ddb6fd27b06a4007021451bf45
7
- data.tar.gz: c6a51e069b36d2d9060e12d762e82fbf7c96b9c15f16314075c9621a96de5b9bf6b570752102712e2019a1f3008eab4bbed24171328a78ff5ff443da1d767cd8
6
+ metadata.gz: 8e727ba7003c508232814161f26ffad6b5a496d20376a82bb35e7bc30256535730a6b2e9a74a09e833565654dcb76950d04a76cf9234743be386fbae19dcac5d
7
+ data.tar.gz: 9cd3dfaf0acf24f3a9c581380a07477c7280df0e964e17c9bd7ec53db0b8d3e0db67cee4ca7eed12de2088417056aeb11d7ec22cb510c064c0bc8a0f406787b4
@@ -0,0 +1,45 @@
1
+ module Metacrunch
2
+ module Mab2
3
+ class Document
4
+ class MabXmlParser
5
+
6
+ def parse(mab_xml)
7
+ xml = Nokogiri::XML(mab_xml.try(:strip))
8
+ xml.remove_namespaces!
9
+
10
+ html_entities = HTMLEntities.new
11
+ document = Document.new
12
+
13
+ xml.xpath("//metadata/record").children.each do |node|
14
+ if node.name == "controlfield"
15
+ controlfield = Metacrunch::Mab2::Document::Controlfield.new
16
+ controlfield.tag = node.attributes["tag"].value
17
+ controlfield.values = node.content
18
+ document.add_controlfield(controlfield)
19
+ elsif node.name == "datafield" then
20
+ datafield = Metacrunch::Mab2::Document::Datafield.new
21
+ datafield.tag = node.attributes["tag"].value
22
+ datafield.ind1 = node.attributes["ind1"].try(:value)
23
+ datafield.ind2 = node.attributes["ind2"].try(:value)
24
+
25
+ node.children.each do |subnode|
26
+ if subnode.name == "subfield"
27
+ subfield = Metacrunch::Mab2::Document::Subfield.new
28
+ subfield.code = subnode.attributes["code"].value
29
+ value = subnode.content
30
+ subfield.value = value.include?("&") ? html_entities.decode(value) : value
31
+ datafield.add_subfield(subfield)
32
+ end
33
+ end
34
+
35
+ document.add_datafield(datafield)
36
+ end
37
+ end
38
+
39
+ document
40
+ end
41
+
42
+ end
43
+ end
44
+ end
45
+ end
@@ -1,16 +1,11 @@
1
- require "htmlentities"
2
1
  require "ox"
3
- require_relative "../document"
4
2
 
5
3
  module Metacrunch
6
4
  module Mab2
7
5
  class Document
8
- class AlephMabXmlParser < Ox::Sax
9
- def self.parse(aleph_mab_xml)
10
- new.parse(aleph_mab_xml)
11
- end
6
+ class MabXmlParser < Ox::Sax
12
7
 
13
- def parse(io_or_string)
8
+ def parse(mab_xml)
14
9
  # initialize state machine
15
10
  @in_controlfield = @in_datafield = @in_subfield = false
16
11
 
@@ -18,11 +13,9 @@ module Metacrunch
18
13
  @document = Document.new
19
14
  @html_entities_coder = HTMLEntities.new
20
15
 
21
- io = io_or_string.is_a?(IO) ? io_or_string : StringIO.new(io_or_string)
22
-
23
16
  # convert_special tells ox to convert some html entities already during
24
17
  # parsing, which minifies the amount of entities we have to decode ourself
25
- Ox.sax_parse(self, io, convert_special: true)
18
+ Ox.sax_parse(self, mab_xml, convert_special: true)
26
19
 
27
20
  return @document
28
21
  end
@@ -1,12 +1,17 @@
1
1
  module Metacrunch
2
2
  module Mab2
3
3
  class Document
4
- require_relative "./document/aleph_mab_xml_parser"
5
- require_relative "./document/controlfield"
6
- require_relative "./document/datafield"
7
- require_relative "./document/datafield_set"
8
- require_relative "./document/subfield"
9
- require_relative "./document/subfield_set"
4
+ if RUBY_PLATFORM == "java"
5
+ require_relative "document/jruby/mab_xml_parser"
6
+ else
7
+ require_relative "document/mab_xml_parser"
8
+ end
9
+
10
+ require_relative "document/controlfield"
11
+ require_relative "document/datafield"
12
+ require_relative "document/datafield_set"
13
+ require_relative "document/subfield"
14
+ require_relative "document/subfield_set"
10
15
 
11
16
  # ------------------------------------------------------------------------------
12
17
  # Parsing
@@ -17,7 +22,7 @@ module Metacrunch
17
22
  # @return [Metacrunch::Mab2::Document]
18
23
  #
19
24
  def self.from_aleph_mab_xml(xml)
20
- AlephMabXmlParser.parse(xml)
25
+ MabXmlParser.new.parse(xml)
21
26
  end
22
27
 
23
28
  def initialize
@@ -1,5 +1,5 @@
1
1
  module Metacrunch
2
2
  module Mab2
3
- VERSION = "1.2.0"
3
+ VERSION = "1.2.1"
4
4
  end
5
5
  end
@@ -1,8 +1,9 @@
1
1
  require "rubygems"
2
2
  require "active_support"
3
3
  require "active_support/core_ext"
4
- require "ox"
5
4
  require "htmlentities"
5
+ require "nokogiri"
6
+ require "ox" unless RUBY_PLATFORM == "java"
6
7
 
7
8
  module Metacrunch
8
9
  module Mab2
@@ -20,7 +20,14 @@ Gem::Specification.new do |spec|
20
20
  spec.add_dependency "activesupport", "~> 4.2"
21
21
  spec.add_dependency "htmlentities", "~> 4.3"
22
22
  spec.add_dependency "nokogiri", "~> 1.6"
23
- spec.add_dependency "ox", "~> 2.1"
24
23
  spec.add_dependency "self_enumerable", "~> 0.2"
24
+
25
+ if RUBY_PLATFORM != "java"
26
+ spec.add_dependency "ox", "~> 2.1"
27
+ end
28
+
29
+ if RUBY_PLATFORM == "java"
30
+ spec.platform = Gem::Platform::CURRENT
31
+ end
25
32
  end
26
33
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metacrunch-mab2
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - René Sprotte
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: exe
12
12
  cert_chain: []
13
- date: 2016-04-22 00:00:00.000000000 Z
13
+ date: 2016-04-25 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: activesupport
@@ -55,33 +55,33 @@ dependencies:
55
55
  - !ruby/object:Gem::Version
56
56
  version: '1.6'
57
57
  - !ruby/object:Gem::Dependency
58
- name: ox
58
+ name: self_enumerable
59
59
  requirement: !ruby/object:Gem::Requirement
60
60
  requirements:
61
61
  - - "~>"
62
62
  - !ruby/object:Gem::Version
63
- version: '2.1'
63
+ version: '0.2'
64
64
  type: :runtime
65
65
  prerelease: false
66
66
  version_requirements: !ruby/object:Gem::Requirement
67
67
  requirements:
68
68
  - - "~>"
69
69
  - !ruby/object:Gem::Version
70
- version: '2.1'
70
+ version: '0.2'
71
71
  - !ruby/object:Gem::Dependency
72
- name: self_enumerable
72
+ name: ox
73
73
  requirement: !ruby/object:Gem::Requirement
74
74
  requirements:
75
75
  - - "~>"
76
76
  - !ruby/object:Gem::Version
77
- version: '0.2'
77
+ version: '2.1'
78
78
  type: :runtime
79
79
  prerelease: false
80
80
  version_requirements: !ruby/object:Gem::Requirement
81
81
  requirements:
82
82
  - - "~>"
83
83
  - !ruby/object:Gem::Version
84
- version: '0.2'
84
+ version: '2.1'
85
85
  description:
86
86
  email: r.sprotte@ub.uni-paderborn.de
87
87
  executables: []
@@ -100,10 +100,11 @@ files:
100
100
  - lib/metacrunch/mab2/builder.rb
101
101
  - lib/metacrunch/mab2/cli.rb
102
102
  - lib/metacrunch/mab2/document.rb
103
- - lib/metacrunch/mab2/document/aleph_mab_xml_parser.rb
104
103
  - lib/metacrunch/mab2/document/controlfield.rb
105
104
  - lib/metacrunch/mab2/document/datafield.rb
106
105
  - lib/metacrunch/mab2/document/datafield_set.rb
106
+ - lib/metacrunch/mab2/document/jruby/mab_xml_parser.rb
107
+ - lib/metacrunch/mab2/document/mab_xml_parser.rb
107
108
  - lib/metacrunch/mab2/document/subfield.rb
108
109
  - lib/metacrunch/mab2/document/subfield_set.rb
109
110
  - lib/metacrunch/mab2/version.rb