metacrunch-marcxml 3.0.0 → 3.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3634c811258d11102cde40466592986814cb3ecf463985fd4ff139934539fe8b
4
- data.tar.gz: 9b86c52e489af8bd05f698af04526d3c1561d67180fd37db88d797d351481ca9
3
+ metadata.gz: 639f08dad0d34c7b863be2778a8aa961085b1865d44fdf1ecf84d073f9cac1e5
4
+ data.tar.gz: 4d1a9023bafef92c0fb6ebaa0b6904895946dc49b550db53e5b9cf1351e79439
5
5
  SHA512:
6
- metadata.gz: 170cb959fb34cd493867c07e0d6ff3553385ccb7c7cca41beeee1f70208725921ea80f2a3765511fd87d330a1d61b90ab7167cf3a25df65797f6194960f8ec7b
7
- data.tar.gz: ccfd0da9bfc9cb8d66dbf6fb632f0b7949fc0fb539586a9eff1ef2d148446f8d611ddea48346dbbb82052ff794ed6b8704fa5d3ed967a38c4c61f3b7eeabc33e
6
+ metadata.gz: 5d0755a1ccaa39754ba1c1bfded2a1d8d9826498fc671a11f9168c1781e13feee3027779cc9a80b4f2631879926ce18a78a4dad5ca12c08e2b06d30723377022
7
+ data.tar.gz: 0e7fc91bbcca1e2252fe515c53cefdd14782f0fa03b621661861851b9beaa1c5bf89c897e377c936e751e83fd804e1802d9d59e85198482a6be37511a0568bfa
@@ -1,6 +1,5 @@
1
1
  require "active_support"
2
2
  require "active_support/core_ext"
3
- require "htmlentities"
4
3
  require "ox"
5
4
 
6
5
  module Metacrunch
@@ -12,11 +11,10 @@ module Metacrunch
12
11
  #
13
12
  # Convenience method for Metacrunch::Marcxml.parse
14
13
  #
15
- # @return [Metacrunch::Marcxml::Document] the parsed {Metacrunch::Marcxml::Document}
16
14
  # @see Metacrunch::Marcxml#parse
17
15
  #
18
- def Marcxml(xml)
19
- Metacrunch::Marcxml.parse(xml)
16
+ def Marcxml(xml, collection_mode: false)
17
+ Metacrunch::Marcxml.parse(xml, collection_mode: collection_mode)
20
18
  end
21
19
  end
22
20
 
@@ -27,20 +25,23 @@ module Metacrunch
27
25
  # Parses a MARCXML string into a {Metacrunch::Marcxml::Document}.
28
26
  #
29
27
  # @param xml [String] the MARCXML document as a string
30
- # @return [Metacrunch::Marcxml::Document] the parsed {Metacrunch::Marcxml::Document}
28
+ # @param collection_mode [true, false] set to `true` if the MARCXML contains more than one record.
29
+ # Default is `false`.
30
+ # @return [Metacrunch::Marcxml::Document, Array<Metacrunch::Marcxml::Document>, nil] the parsed
31
+ # {Metacrunch::Marcxml::Document}, an array of documents if `collection_mode` was `true`
32
+ # or `nil` if the MARCXML did not contain valid data.
31
33
  #
32
- def parse(xml)
33
- Parser.new.parse(xml)
34
+ def parse(xml, collection_mode: false)
35
+ Parser.new.parse(xml, collection_mode: collection_mode)
34
36
  end
35
37
 
36
38
  #
37
- # Convenience method for Metacrunch::Marcxml.parse
39
+ # Convenience method for Metacrunch::Marcxml.parse(xml, collection_mode: false)
38
40
  #
39
- # @return [Metacrunch::Marcxml::Document] the parsed {Metacrunch::Marcxml::Document}
40
41
  # @see Metacrunch::Marcxml#parse
41
42
  #
42
43
  def [](xml)
43
- self.parse(xml)
44
+ self.parse(xml, collection_mode: false)
44
45
  end
45
46
  end
46
47
  end
@@ -13,6 +13,10 @@ module Metacrunch
13
13
  @datafields_map = {}
14
14
  end
15
15
 
16
+ def empty?
17
+ @controlfields_map.blank? && @datafields_map.blank?
18
+ end
19
+
16
20
  # ------------------------------------------------------------------------------
17
21
  # Control fields
18
22
  # ------------------------------------------------------------------------------
@@ -3,10 +3,10 @@ module Metacrunch
3
3
  class Document
4
4
  class Controlfield
5
5
 
6
- attr_reader :tag
7
- attr_reader :value
6
+ attr_accessor :tag
7
+ attr_accessor :value
8
8
 
9
- def initialize(tag, value)
9
+ def initialize(tag = nil, value = nil)
10
10
  @tag = tag
11
11
  @value = value
12
12
  end
@@ -3,10 +3,10 @@ module Metacrunch
3
3
  class Document
4
4
  class Datafield
5
5
 
6
- attr_reader :tag
7
- attr_reader :ind1, :ind2
6
+ attr_accessor :tag
7
+ attr_accessor :ind1, :ind2
8
8
 
9
- def initialize(tag, ind1:nil, ind2:nil)
9
+ def initialize(tag = nil, ind1:nil, ind2:nil)
10
10
  @tag = tag
11
11
  @ind1 = ind1
12
12
  @ind2 = ind2
@@ -3,10 +3,10 @@ module Metacrunch
3
3
  class Document
4
4
  class Subfield
5
5
 
6
- attr_reader :code
7
- attr_reader :value
6
+ attr_accessor :code
7
+ attr_accessor :value
8
8
 
9
- def initialize(code, value)
9
+ def initialize(code = nil, value = nil)
10
10
  @code = code
11
11
  @value = value
12
12
  end
@@ -1,80 +1,78 @@
1
1
  module Metacrunch
2
2
  module Marcxml
3
3
  class Parser < Ox::Sax
4
+ class ParsingDone < StandardError ; end
4
5
 
5
- def parse(marc_xml)
6
- # initialize state machine
7
- @in_controlfield = @in_datafield = @in_subfield = false
6
+ def parse(marc_xml, collection_mode: false)
7
+ @stack = []
8
+ @documents = []
9
+ @collection_mode = collection_mode
8
10
 
9
- @controlfield = @datafield = @subfield = nil
10
- @document = Document.new
11
- @html_entities_coder = HTMLEntities.new
11
+ begin
12
+ Ox.sax_parse(self, marc_xml, convert_special: true)
13
+ rescue ParsingDone ; end
12
14
 
13
- # convert_special tells ox to convert some html entities already during
14
- # parsing, which minifies the amount of entities we have to decode using
15
- # html_entities_coder in #text.
16
- Ox.sax_parse(self, marc_xml, convert_special: true)
17
-
18
- return @document
15
+ collection_mode ? @documents : @documents.first
19
16
  end
20
17
 
21
18
  def start_element(name)
22
- if name == :subfield
23
- @in_subfield = true
24
- @subfield = {}
25
- elsif name == :datafield
26
- @in_datafield = true
27
- @datafield = {subfields: []}
28
- elsif name == :controlfield
29
- @in_controlfield = true
30
- @controlfield = {}
19
+ @stack << [name, {}]
20
+
21
+ element_name, element_data = @stack[-1]
22
+ parent_name, parent_data = @stack[-2]
23
+
24
+ if element_name == :record
25
+ element_data[:document] = Document.new
26
+ elsif element_name == :controlfield && parent_name == :record
27
+ element_data[:controlfield] = Document::Controlfield.new
28
+ elsif element_name == :datafield && parent_name == :record
29
+ element_data[:datafield] = Document::Datafield.new
30
+ elsif element_name == :subfield && parent_name == :datafield
31
+ element_data[:subfield] = Document::Subfield.new
31
32
  end
32
33
  end
33
34
 
34
35
  def end_element(name)
35
- if @in_subfield
36
- @in_subfield = false
37
-
38
- subfield = Document::Subfield.new(@subfield[:code], @subfield[:value])
39
- @datafield[:subfields] << subfield
40
- elsif @in_datafield
41
- @in_datafield = false
42
-
43
- datafield = Document::Datafield.new(@datafield[:tag], ind1: @datafield[:ind1], ind2: @datafield[:ind2])
44
- @datafield[:subfields].each do |subfield|
45
- datafield.add_subfield(subfield)
46
- end
36
+ element_name, element_data = @stack[-1]
37
+ parent_name, parent_data = @stack[-2]
47
38
 
48
- @document.add_datafield(datafield)
49
- elsif @in_controlfield
50
- @in_controlfield = false
51
-
52
- controlfield = Document::Controlfield.new(@controlfield[:tag], @controlfield[:values])
53
- @document.add_controlfield(controlfield)
39
+ if element_name == :record
40
+ @documents << element_data[:document] unless element_data[:document].empty?
41
+ raise ParsingDone unless @collection_mode
42
+ elsif element_name == :controlfield && parent_name == :record
43
+ parent_data[:document].add_controlfield(element_data[:controlfield])
44
+ elsif element_name == :datafield && parent_name == :record
45
+ parent_data[:document].add_datafield(element_data[:datafield])
46
+ elsif element_name == :subfield && parent_name == :datafield
47
+ parent_data[:datafield].add_subfield(element_data[:subfield])
54
48
  end
49
+
50
+ @stack.pop
55
51
  end
56
52
 
57
53
  def attr(name, value)
58
- if @in_subfield
59
- @subfield[:code] = value if name == :code
60
- elsif @in_datafield
61
- if name == :tag
62
- @datafield[:tag] = value
63
- elsif name == :ind1
64
- @datafield[:ind1] = value
65
- elsif name == :ind2
66
- @datafield[:ind2] = value
67
- end
68
- elsif @in_controlfield
69
- @controlfield[:tag] = value if name == :tag
54
+ element_name, element_data = @stack[-1]
55
+ parent_name, parent_data = @stack[-2]
56
+
57
+ if element_name == :controlfield && parent_name == :record
58
+ element_data[:controlfield].tag = value if name == :tag
59
+ elsif element_name == :datafield && parent_name == :record
60
+ element_data[:datafield].tag = value if name == :tag
61
+ element_data[:datafield].ind1 = value if name == :ind1
62
+ element_data[:datafield].ind2 = value if name == :ind2
63
+ elsif element_name == :subfield && parent_name == :datafield
64
+ element_data[:subfield].code = value if name == :code
70
65
  end
71
66
  end
72
67
 
73
68
  def text(value)
74
- if @in_subfield
75
- @subfield[:value] = value.include?("&") ? @html_entities_coder.decode(value) : value
76
- elsif @in_controlfield
77
- @controlfield[:values] = value
69
+ element_name, element_data = @stack[-1]
70
+ parent_name, parent_data = @stack[-2]
71
+
72
+ if element_name == :controlfield && parent_name == :record
73
+ element_data[:controlfield].value = value
74
+ elsif element_name == :subfield && parent_name == :datafield
75
+ element_data[:subfield].value = value
78
76
  end
79
77
  end
80
78
  end
@@ -1,5 +1,5 @@
1
1
  module Metacrunch
2
2
  module Marcxml
3
- VERSION = "3.0.0"
3
+ VERSION = "3.1.0"
4
4
  end
5
5
  end
@@ -18,7 +18,6 @@ Gem::Specification.new do |spec|
18
18
  spec.require_paths = ["lib"]
19
19
 
20
20
  spec.add_dependency "activesupport", ">= 5.1"
21
- spec.add_dependency "htmlentities", ">= 4.3"
22
21
  spec.add_dependency "ox", ">= 2.11"
23
22
  end
24
23
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metacrunch-marcxml
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0
4
+ version: 3.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - René Sprotte
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-10-16 00:00:00.000000000 Z
11
+ date: 2021-02-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -24,20 +24,6 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '5.1'
27
- - !ruby/object:Gem::Dependency
28
- name: htmlentities
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - ">="
32
- - !ruby/object:Gem::Version
33
- version: '4.3'
34
- type: :runtime
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - ">="
39
- - !ruby/object:Gem::Version
40
- version: '4.3'
41
27
  - !ruby/object:Gem::Dependency
42
28
  name: ox
43
29
  requirement: !ruby/object:Gem::Requirement