metacrunch-mab2 1.2.0 → 1.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c4778eeae582a8416053b470b2bd54827cb7e68a
4
- data.tar.gz: 1e96e0e40130e5d1b91b186f061ea06ad8900c34
3
+ metadata.gz: 1f3f60e61efdc2d800bb156ef2af37b56f005d97
4
+ data.tar.gz: d08b0a6f38b526dbbc29c301c84461b30e7e41f7
5
5
  SHA512:
6
- metadata.gz: 6ad51835a53f4ab698b2c5f822ddee0470a00b775e8881b3aba0becfff6ed0e38b1e231eef89b91e1ef6d6b0e17438a91b8044ddb6fd27b06a4007021451bf45
7
- data.tar.gz: c6a51e069b36d2d9060e12d762e82fbf7c96b9c15f16314075c9621a96de5b9bf6b570752102712e2019a1f3008eab4bbed24171328a78ff5ff443da1d767cd8
6
+ metadata.gz: 8e727ba7003c508232814161f26ffad6b5a496d20376a82bb35e7bc30256535730a6b2e9a74a09e833565654dcb76950d04a76cf9234743be386fbae19dcac5d
7
+ data.tar.gz: 9cd3dfaf0acf24f3a9c581380a07477c7280df0e964e17c9bd7ec53db0b8d3e0db67cee4ca7eed12de2088417056aeb11d7ec22cb510c064c0bc8a0f406787b4
@@ -0,0 +1,45 @@
1
+ module Metacrunch
2
+ module Mab2
3
+ class Document
4
+ class MabXmlParser
5
+
6
+ def parse(mab_xml)
7
+ xml = Nokogiri::XML(mab_xml.try(:strip))
8
+ xml.remove_namespaces!
9
+
10
+ html_entities = HTMLEntities.new
11
+ document = Document.new
12
+
13
+ xml.xpath("//metadata/record").children.each do |node|
14
+ if node.name == "controlfield"
15
+ controlfield = Metacrunch::Mab2::Document::Controlfield.new
16
+ controlfield.tag = node.attributes["tag"].value
17
+ controlfield.values = node.content
18
+ document.add_controlfield(controlfield)
19
+ elsif node.name == "datafield" then
20
+ datafield = Metacrunch::Mab2::Document::Datafield.new
21
+ datafield.tag = node.attributes["tag"].value
22
+ datafield.ind1 = node.attributes["ind1"].try(:value)
23
+ datafield.ind2 = node.attributes["ind2"].try(:value)
24
+
25
+ node.children.each do |subnode|
26
+ if subnode.name == "subfield"
27
+ subfield = Metacrunch::Mab2::Document::Subfield.new
28
+ subfield.code = subnode.attributes["code"].value
29
+ value = subnode.content
30
+ subfield.value = value.include?("&") ? html_entities.decode(value) : value
31
+ datafield.add_subfield(subfield)
32
+ end
33
+ end
34
+
35
+ document.add_datafield(datafield)
36
+ end
37
+ end
38
+
39
+ document
40
+ end
41
+
42
+ end
43
+ end
44
+ end
45
+ end
@@ -1,16 +1,11 @@
1
- require "htmlentities"
2
1
  require "ox"
3
- require_relative "../document"
4
2
 
5
3
  module Metacrunch
6
4
  module Mab2
7
5
  class Document
8
- class AlephMabXmlParser < Ox::Sax
9
- def self.parse(aleph_mab_xml)
10
- new.parse(aleph_mab_xml)
11
- end
6
+ class MabXmlParser < Ox::Sax
12
7
 
13
- def parse(io_or_string)
8
+ def parse(mab_xml)
14
9
  # initialize state machine
15
10
  @in_controlfield = @in_datafield = @in_subfield = false
16
11
 
@@ -18,11 +13,9 @@ module Metacrunch
18
13
  @document = Document.new
19
14
  @html_entities_coder = HTMLEntities.new
20
15
 
21
- io = io_or_string.is_a?(IO) ? io_or_string : StringIO.new(io_or_string)
22
-
23
16
  # convert_special tells ox to convert some html entities already during
24
17
  # parsing, which minifies the amount of entities we have to decode ourself
25
- Ox.sax_parse(self, io, convert_special: true)
18
+ Ox.sax_parse(self, mab_xml, convert_special: true)
26
19
 
27
20
  return @document
28
21
  end
@@ -1,12 +1,17 @@
1
1
  module Metacrunch
2
2
  module Mab2
3
3
  class Document
4
- require_relative "./document/aleph_mab_xml_parser"
5
- require_relative "./document/controlfield"
6
- require_relative "./document/datafield"
7
- require_relative "./document/datafield_set"
8
- require_relative "./document/subfield"
9
- require_relative "./document/subfield_set"
4
+ if RUBY_PLATFORM == "java"
5
+ require_relative "document/jruby/mab_xml_parser"
6
+ else
7
+ require_relative "document/mab_xml_parser"
8
+ end
9
+
10
+ require_relative "document/controlfield"
11
+ require_relative "document/datafield"
12
+ require_relative "document/datafield_set"
13
+ require_relative "document/subfield"
14
+ require_relative "document/subfield_set"
10
15
 
11
16
  # ------------------------------------------------------------------------------
12
17
  # Parsing
@@ -17,7 +22,7 @@ module Metacrunch
17
22
  # @return [Metacrunch::Mab2::Document]
18
23
  #
19
24
  def self.from_aleph_mab_xml(xml)
20
- AlephMabXmlParser.parse(xml)
25
+ MabXmlParser.new.parse(xml)
21
26
  end
22
27
 
23
28
  def initialize
@@ -1,5 +1,5 @@
1
1
  module Metacrunch
2
2
  module Mab2
3
- VERSION = "1.2.0"
3
+ VERSION = "1.2.1"
4
4
  end
5
5
  end
@@ -1,8 +1,9 @@
1
1
  require "rubygems"
2
2
  require "active_support"
3
3
  require "active_support/core_ext"
4
- require "ox"
5
4
  require "htmlentities"
5
+ require "nokogiri"
6
+ require "ox" unless RUBY_PLATFORM == "java"
6
7
 
7
8
  module Metacrunch
8
9
  module Mab2
@@ -20,7 +20,14 @@ Gem::Specification.new do |spec|
20
20
  spec.add_dependency "activesupport", "~> 4.2"
21
21
  spec.add_dependency "htmlentities", "~> 4.3"
22
22
  spec.add_dependency "nokogiri", "~> 1.6"
23
- spec.add_dependency "ox", "~> 2.1"
24
23
  spec.add_dependency "self_enumerable", "~> 0.2"
24
+
25
+ if RUBY_PLATFORM != "java"
26
+ spec.add_dependency "ox", "~> 2.1"
27
+ end
28
+
29
+ if RUBY_PLATFORM == "java"
30
+ spec.platform = Gem::Platform::CURRENT
31
+ end
25
32
  end
26
33
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metacrunch-mab2
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - René Sprotte
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: exe
12
12
  cert_chain: []
13
- date: 2016-04-22 00:00:00.000000000 Z
13
+ date: 2016-04-25 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: activesupport
@@ -55,33 +55,33 @@ dependencies:
55
55
  - !ruby/object:Gem::Version
56
56
  version: '1.6'
57
57
  - !ruby/object:Gem::Dependency
58
- name: ox
58
+ name: self_enumerable
59
59
  requirement: !ruby/object:Gem::Requirement
60
60
  requirements:
61
61
  - - "~>"
62
62
  - !ruby/object:Gem::Version
63
- version: '2.1'
63
+ version: '0.2'
64
64
  type: :runtime
65
65
  prerelease: false
66
66
  version_requirements: !ruby/object:Gem::Requirement
67
67
  requirements:
68
68
  - - "~>"
69
69
  - !ruby/object:Gem::Version
70
- version: '2.1'
70
+ version: '0.2'
71
71
  - !ruby/object:Gem::Dependency
72
- name: self_enumerable
72
+ name: ox
73
73
  requirement: !ruby/object:Gem::Requirement
74
74
  requirements:
75
75
  - - "~>"
76
76
  - !ruby/object:Gem::Version
77
- version: '0.2'
77
+ version: '2.1'
78
78
  type: :runtime
79
79
  prerelease: false
80
80
  version_requirements: !ruby/object:Gem::Requirement
81
81
  requirements:
82
82
  - - "~>"
83
83
  - !ruby/object:Gem::Version
84
- version: '0.2'
84
+ version: '2.1'
85
85
  description:
86
86
  email: r.sprotte@ub.uni-paderborn.de
87
87
  executables: []
@@ -100,10 +100,11 @@ files:
100
100
  - lib/metacrunch/mab2/builder.rb
101
101
  - lib/metacrunch/mab2/cli.rb
102
102
  - lib/metacrunch/mab2/document.rb
103
- - lib/metacrunch/mab2/document/aleph_mab_xml_parser.rb
104
103
  - lib/metacrunch/mab2/document/controlfield.rb
105
104
  - lib/metacrunch/mab2/document/datafield.rb
106
105
  - lib/metacrunch/mab2/document/datafield_set.rb
106
+ - lib/metacrunch/mab2/document/jruby/mab_xml_parser.rb
107
+ - lib/metacrunch/mab2/document/mab_xml_parser.rb
107
108
  - lib/metacrunch/mab2/document/subfield.rb
108
109
  - lib/metacrunch/mab2/document/subfield_set.rb
109
110
  - lib/metacrunch/mab2/version.rb