metacrunch-marcxml 3.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3634c811258d11102cde40466592986814cb3ecf463985fd4ff139934539fe8b
4
- data.tar.gz: 9b86c52e489af8bd05f698af04526d3c1561d67180fd37db88d797d351481ca9
3
+ metadata.gz: 639f08dad0d34c7b863be2778a8aa961085b1865d44fdf1ecf84d073f9cac1e5
4
+ data.tar.gz: 4d1a9023bafef92c0fb6ebaa0b6904895946dc49b550db53e5b9cf1351e79439
5
5
  SHA512:
6
- metadata.gz: 170cb959fb34cd493867c07e0d6ff3553385ccb7c7cca41beeee1f70208725921ea80f2a3765511fd87d330a1d61b90ab7167cf3a25df65797f6194960f8ec7b
7
- data.tar.gz: ccfd0da9bfc9cb8d66dbf6fb632f0b7949fc0fb539586a9eff1ef2d148446f8d611ddea48346dbbb82052ff794ed6b8704fa5d3ed967a38c4c61f3b7eeabc33e
6
+ metadata.gz: 5d0755a1ccaa39754ba1c1bfded2a1d8d9826498fc671a11f9168c1781e13feee3027779cc9a80b4f2631879926ce18a78a4dad5ca12c08e2b06d30723377022
7
+ data.tar.gz: 0e7fc91bbcca1e2252fe515c53cefdd14782f0fa03b621661861851b9beaa1c5bf89c897e377c936e751e83fd804e1802d9d59e85198482a6be37511a0568bfa
@@ -1,6 +1,5 @@
1
1
  require "active_support"
2
2
  require "active_support/core_ext"
3
- require "htmlentities"
4
3
  require "ox"
5
4
 
6
5
  module Metacrunch
@@ -12,11 +11,10 @@ module Metacrunch
12
11
  #
13
12
  # Convenience method for Metacrunch::Marcxml.parse
14
13
  #
15
- # @return [Metacrunch::Marcxml::Document] the parsed {Metacrunch::Marcxml::Document}
16
14
  # @see Metacrunch::Marcxml#parse
17
15
  #
18
- def Marcxml(xml)
19
- Metacrunch::Marcxml.parse(xml)
16
+ def Marcxml(xml, collection_mode: false)
17
+ Metacrunch::Marcxml.parse(xml, collection_mode: collection_mode)
20
18
  end
21
19
  end
22
20
 
@@ -27,20 +25,23 @@ module Metacrunch
27
25
  # Parses a MARCXML string into a {Metacrunch::Marcxml::Document}.
28
26
  #
29
27
  # @param xml [String] the MARCXML document as a string
30
- # @return [Metacrunch::Marcxml::Document] the parsed {Metacrunch::Marcxml::Document}
28
+ # @param collection_mode [true, false] set to `true` if the MARCXML contains more than one record.
29
+ # Default is `false`.
30
+ # @return [Metacrunch::Marcxml::Document, Array<Metacrunch::Marcxml::Document>, nil] the parsed
31
+ # {Metacrunch::Marcxml::Document}, an array of documents if `collection_mode` was `true`
32
+ # or `nil` if the MARCXML did not contain valid data.
31
33
  #
32
- def parse(xml)
33
- Parser.new.parse(xml)
34
+ def parse(xml, collection_mode: false)
35
+ Parser.new.parse(xml, collection_mode: collection_mode)
34
36
  end
35
37
 
36
38
  #
37
- # Convenience method for Metacrunch::Marcxml.parse
39
+ # Convenience method for Metacrunch::Marcxml.parse(xml, collection_mode: false)
38
40
  #
39
- # @return [Metacrunch::Marcxml::Document] the parsed {Metacrunch::Marcxml::Document}
40
41
  # @see Metacrunch::Marcxml#parse
41
42
  #
42
43
  def [](xml)
43
- self.parse(xml)
44
+ self.parse(xml, collection_mode: false)
44
45
  end
45
46
  end
46
47
  end
@@ -13,6 +13,10 @@ module Metacrunch
13
13
  @datafields_map = {}
14
14
  end
15
15
 
16
+ def empty?
17
+ @controlfields_map.blank? && @datafields_map.blank?
18
+ end
19
+
16
20
  # ------------------------------------------------------------------------------
17
21
  # Control fields
18
22
  # ------------------------------------------------------------------------------
@@ -3,10 +3,10 @@ module Metacrunch
3
3
  class Document
4
4
  class Controlfield
5
5
 
6
- attr_reader :tag
7
- attr_reader :value
6
+ attr_accessor :tag
7
+ attr_accessor :value
8
8
 
9
- def initialize(tag, value)
9
+ def initialize(tag = nil, value = nil)
10
10
  @tag = tag
11
11
  @value = value
12
12
  end
@@ -3,10 +3,10 @@ module Metacrunch
3
3
  class Document
4
4
  class Datafield
5
5
 
6
- attr_reader :tag
7
- attr_reader :ind1, :ind2
6
+ attr_accessor :tag
7
+ attr_accessor :ind1, :ind2
8
8
 
9
- def initialize(tag, ind1:nil, ind2:nil)
9
+ def initialize(tag = nil, ind1:nil, ind2:nil)
10
10
  @tag = tag
11
11
  @ind1 = ind1
12
12
  @ind2 = ind2
@@ -3,10 +3,10 @@ module Metacrunch
3
3
  class Document
4
4
  class Subfield
5
5
 
6
- attr_reader :code
7
- attr_reader :value
6
+ attr_accessor :code
7
+ attr_accessor :value
8
8
 
9
- def initialize(code, value)
9
+ def initialize(code = nil, value = nil)
10
10
  @code = code
11
11
  @value = value
12
12
  end
@@ -1,80 +1,78 @@
1
1
  module Metacrunch
2
2
  module Marcxml
3
3
  class Parser < Ox::Sax
4
+ class ParsingDone < StandardError ; end
4
5
 
5
- def parse(marc_xml)
6
- # initialize state machine
7
- @in_controlfield = @in_datafield = @in_subfield = false
6
+ def parse(marc_xml, collection_mode: false)
7
+ @stack = []
8
+ @documents = []
9
+ @collection_mode = collection_mode
8
10
 
9
- @controlfield = @datafield = @subfield = nil
10
- @document = Document.new
11
- @html_entities_coder = HTMLEntities.new
11
+ begin
12
+ Ox.sax_parse(self, marc_xml, convert_special: true)
13
+ rescue ParsingDone ; end
12
14
 
13
- # convert_special tells ox to convert some html entities already during
14
- # parsing, which minifies the amount of entities we have to decode using
15
- # html_entities_coder in #text.
16
- Ox.sax_parse(self, marc_xml, convert_special: true)
17
-
18
- return @document
15
+ collection_mode ? @documents : @documents.first
19
16
  end
20
17
 
21
18
  def start_element(name)
22
- if name == :subfield
23
- @in_subfield = true
24
- @subfield = {}
25
- elsif name == :datafield
26
- @in_datafield = true
27
- @datafield = {subfields: []}
28
- elsif name == :controlfield
29
- @in_controlfield = true
30
- @controlfield = {}
19
+ @stack << [name, {}]
20
+
21
+ element_name, element_data = @stack[-1]
22
+ parent_name, parent_data = @stack[-2]
23
+
24
+ if element_name == :record
25
+ element_data[:document] = Document.new
26
+ elsif element_name == :controlfield && parent_name == :record
27
+ element_data[:controlfield] = Document::Controlfield.new
28
+ elsif element_name == :datafield && parent_name == :record
29
+ element_data[:datafield] = Document::Datafield.new
30
+ elsif element_name == :subfield && parent_name == :datafield
31
+ element_data[:subfield] = Document::Subfield.new
31
32
  end
32
33
  end
33
34
 
34
35
  def end_element(name)
35
- if @in_subfield
36
- @in_subfield = false
37
-
38
- subfield = Document::Subfield.new(@subfield[:code], @subfield[:value])
39
- @datafield[:subfields] << subfield
40
- elsif @in_datafield
41
- @in_datafield = false
42
-
43
- datafield = Document::Datafield.new(@datafield[:tag], ind1: @datafield[:ind1], ind2: @datafield[:ind2])
44
- @datafield[:subfields].each do |subfield|
45
- datafield.add_subfield(subfield)
46
- end
36
+ element_name, element_data = @stack[-1]
37
+ parent_name, parent_data = @stack[-2]
47
38
 
48
- @document.add_datafield(datafield)
49
- elsif @in_controlfield
50
- @in_controlfield = false
51
-
52
- controlfield = Document::Controlfield.new(@controlfield[:tag], @controlfield[:values])
53
- @document.add_controlfield(controlfield)
39
+ if element_name == :record
40
+ @documents << element_data[:document] unless element_data[:document].empty?
41
+ raise ParsingDone unless @collection_mode
42
+ elsif element_name == :controlfield && parent_name == :record
43
+ parent_data[:document].add_controlfield(element_data[:controlfield])
44
+ elsif element_name == :datafield && parent_name == :record
45
+ parent_data[:document].add_datafield(element_data[:datafield])
46
+ elsif element_name == :subfield && parent_name == :datafield
47
+ parent_data[:datafield].add_subfield(element_data[:subfield])
54
48
  end
49
+
50
+ @stack.pop
55
51
  end
56
52
 
57
53
  def attr(name, value)
58
- if @in_subfield
59
- @subfield[:code] = value if name == :code
60
- elsif @in_datafield
61
- if name == :tag
62
- @datafield[:tag] = value
63
- elsif name == :ind1
64
- @datafield[:ind1] = value
65
- elsif name == :ind2
66
- @datafield[:ind2] = value
67
- end
68
- elsif @in_controlfield
69
- @controlfield[:tag] = value if name == :tag
54
+ element_name, element_data = @stack[-1]
55
+ parent_name, parent_data = @stack[-2]
56
+
57
+ if element_name == :controlfield && parent_name == :record
58
+ element_data[:controlfield].tag = value if name == :tag
59
+ elsif element_name == :datafield && parent_name == :record
60
+ element_data[:datafield].tag = value if name == :tag
61
+ element_data[:datafield].ind1 = value if name == :ind1
62
+ element_data[:datafield].ind2 = value if name == :ind2
63
+ elsif element_name == :subfield && parent_name == :datafield
64
+ element_data[:subfield].code = value if name == :code
70
65
  end
71
66
  end
72
67
 
73
68
  def text(value)
74
- if @in_subfield
75
- @subfield[:value] = value.include?("&") ? @html_entities_coder.decode(value) : value
76
- elsif @in_controlfield
77
- @controlfield[:values] = value
69
+ element_name, element_data = @stack[-1]
70
+ parent_name, parent_data = @stack[-2]
71
+
72
+ if element_name == :controlfield && parent_name == :record
73
+ element_data[:controlfield].value = value
74
+ elsif element_name == :subfield && parent_name == :datafield
75
+ element_data[:subfield].value = value
78
76
  end
79
77
  end
80
78
  end
@@ -1,5 +1,5 @@
1
1
  module Metacrunch
2
2
  module Marcxml
3
- VERSION = "3.0.0"
3
+ VERSION = "3.1.0"
4
4
  end
5
5
  end
@@ -18,7 +18,6 @@ Gem::Specification.new do |spec|
18
18
  spec.require_paths = ["lib"]
19
19
 
20
20
  spec.add_dependency "activesupport", ">= 5.1"
21
- spec.add_dependency "htmlentities", ">= 4.3"
22
21
  spec.add_dependency "ox", ">= 2.11"
23
22
  end
24
23
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metacrunch-marcxml
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0
4
+ version: 3.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - René Sprotte
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-10-16 00:00:00.000000000 Z
11
+ date: 2021-02-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -24,20 +24,6 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '5.1'
27
- - !ruby/object:Gem::Dependency
28
- name: htmlentities
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - ">="
32
- - !ruby/object:Gem::Version
33
- version: '4.3'
34
- type: :runtime
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - ">="
39
- - !ruby/object:Gem::Version
40
- version: '4.3'
41
27
  - !ruby/object:Gem::Dependency
42
28
  name: ox
43
29
  requirement: !ruby/object:Gem::Requirement