metacrunch-marcxml 3.0.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/metacrunch/marcxml.rb +11 -10
- data/lib/metacrunch/marcxml/document.rb +4 -0
- data/lib/metacrunch/marcxml/document/controlfield.rb +3 -3
- data/lib/metacrunch/marcxml/document/datafield.rb +3 -3
- data/lib/metacrunch/marcxml/document/subfield.rb +3 -3
- data/lib/metacrunch/marcxml/parser.rb +53 -55
- data/lib/metacrunch/marcxml/version.rb +1 -1
- data/metacrunch-marcxml.gemspec +0 -1
- metadata +2 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 639f08dad0d34c7b863be2778a8aa961085b1865d44fdf1ecf84d073f9cac1e5
|
4
|
+
data.tar.gz: 4d1a9023bafef92c0fb6ebaa0b6904895946dc49b550db53e5b9cf1351e79439
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5d0755a1ccaa39754ba1c1bfded2a1d8d9826498fc671a11f9168c1781e13feee3027779cc9a80b4f2631879926ce18a78a4dad5ca12c08e2b06d30723377022
|
7
|
+
data.tar.gz: 0e7fc91bbcca1e2252fe515c53cefdd14782f0fa03b621661861851b9beaa1c5bf89c897e377c936e751e83fd804e1802d9d59e85198482a6be37511a0568bfa
|
data/lib/metacrunch/marcxml.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
require "active_support"
|
2
2
|
require "active_support/core_ext"
|
3
|
-
require "htmlentities"
|
4
3
|
require "ox"
|
5
4
|
|
6
5
|
module Metacrunch
|
@@ -12,11 +11,10 @@ module Metacrunch
|
|
12
11
|
#
|
13
12
|
# Convenience method for Metacrunch::Marcxml.parse
|
14
13
|
#
|
15
|
-
# @return [Metacrunch::Marcxml::Document] the parsed {Metacrunch::Marcxml::Document}
|
16
14
|
# @see Metacrunch::Marcxml#parse
|
17
15
|
#
|
18
|
-
def Marcxml(xml)
|
19
|
-
Metacrunch::Marcxml.parse(xml)
|
16
|
+
def Marcxml(xml, collection_mode: false)
|
17
|
+
Metacrunch::Marcxml.parse(xml, collection_mode: collection_mode)
|
20
18
|
end
|
21
19
|
end
|
22
20
|
|
@@ -27,20 +25,23 @@ module Metacrunch
|
|
27
25
|
# Parses a MARCXML string into a {Metacrunch::Marcxml::Document}.
|
28
26
|
#
|
29
27
|
# @param xml [String] the MARCXML document as a string
|
30
|
-
# @
|
28
|
+
# @param collection_mode [true, false] set to `true` if the MARCXML contains more than one record.
|
29
|
+
# Default is `false`.
|
30
|
+
# @return [Metacrunch::Marcxml::Document, Array<Metacrunch::Marcxml::Document>, nil] the parsed
|
31
|
+
# {Metacrunch::Marcxml::Document}, an array of documents if `collection_mode` was `true`
|
32
|
+
# or `nil` if the MARCXML did not contain valid data.
|
31
33
|
#
|
32
|
-
def parse(xml)
|
33
|
-
Parser.new.parse(xml)
|
34
|
+
def parse(xml, collection_mode: false)
|
35
|
+
Parser.new.parse(xml, collection_mode: collection_mode)
|
34
36
|
end
|
35
37
|
|
36
38
|
#
|
37
|
-
# Convenience method for Metacrunch::Marcxml.parse
|
39
|
+
# Convenience method for Metacrunch::Marcxml.parse(xml, collection_mode: false)
|
38
40
|
#
|
39
|
-
# @return [Metacrunch::Marcxml::Document] the parsed {Metacrunch::Marcxml::Document}
|
40
41
|
# @see Metacrunch::Marcxml#parse
|
41
42
|
#
|
42
43
|
def [](xml)
|
43
|
-
self.parse(xml)
|
44
|
+
self.parse(xml, collection_mode: false)
|
44
45
|
end
|
45
46
|
end
|
46
47
|
end
|
@@ -13,6 +13,10 @@ module Metacrunch
|
|
13
13
|
@datafields_map = {}
|
14
14
|
end
|
15
15
|
|
16
|
+
def empty?
|
17
|
+
@controlfields_map.blank? && @datafields_map.blank?
|
18
|
+
end
|
19
|
+
|
16
20
|
# ------------------------------------------------------------------------------
|
17
21
|
# Control fields
|
18
22
|
# ------------------------------------------------------------------------------
|
@@ -3,10 +3,10 @@ module Metacrunch
|
|
3
3
|
class Document
|
4
4
|
class Datafield
|
5
5
|
|
6
|
-
|
7
|
-
|
6
|
+
attr_accessor :tag
|
7
|
+
attr_accessor :ind1, :ind2
|
8
8
|
|
9
|
-
def initialize(tag, ind1:nil, ind2:nil)
|
9
|
+
def initialize(tag = nil, ind1:nil, ind2:nil)
|
10
10
|
@tag = tag
|
11
11
|
@ind1 = ind1
|
12
12
|
@ind2 = ind2
|
@@ -3,10 +3,10 @@ module Metacrunch
|
|
3
3
|
class Document
|
4
4
|
class Subfield
|
5
5
|
|
6
|
-
|
7
|
-
|
6
|
+
attr_accessor :code
|
7
|
+
attr_accessor :value
|
8
8
|
|
9
|
-
def initialize(code, value)
|
9
|
+
def initialize(code = nil, value = nil)
|
10
10
|
@code = code
|
11
11
|
@value = value
|
12
12
|
end
|
@@ -1,80 +1,78 @@
|
|
1
1
|
module Metacrunch
|
2
2
|
module Marcxml
|
3
3
|
class Parser < Ox::Sax
|
4
|
+
class ParsingDone < StandardError ; end
|
4
5
|
|
5
|
-
def parse(marc_xml)
|
6
|
-
|
7
|
-
@
|
6
|
+
def parse(marc_xml, collection_mode: false)
|
7
|
+
@stack = []
|
8
|
+
@documents = []
|
9
|
+
@collection_mode = collection_mode
|
8
10
|
|
9
|
-
|
10
|
-
|
11
|
-
|
11
|
+
begin
|
12
|
+
Ox.sax_parse(self, marc_xml, convert_special: true)
|
13
|
+
rescue ParsingDone ; end
|
12
14
|
|
13
|
-
|
14
|
-
# parsing, which minifies the amount of entities we have to decode using
|
15
|
-
# html_entities_coder in #text.
|
16
|
-
Ox.sax_parse(self, marc_xml, convert_special: true)
|
17
|
-
|
18
|
-
return @document
|
15
|
+
collection_mode ? @documents : @documents.first
|
19
16
|
end
|
20
17
|
|
21
18
|
def start_element(name)
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
19
|
+
@stack << [name, {}]
|
20
|
+
|
21
|
+
element_name, element_data = @stack[-1]
|
22
|
+
parent_name, parent_data = @stack[-2]
|
23
|
+
|
24
|
+
if element_name == :record
|
25
|
+
element_data[:document] = Document.new
|
26
|
+
elsif element_name == :controlfield && parent_name == :record
|
27
|
+
element_data[:controlfield] = Document::Controlfield.new
|
28
|
+
elsif element_name == :datafield && parent_name == :record
|
29
|
+
element_data[:datafield] = Document::Datafield.new
|
30
|
+
elsif element_name == :subfield && parent_name == :datafield
|
31
|
+
element_data[:subfield] = Document::Subfield.new
|
31
32
|
end
|
32
33
|
end
|
33
34
|
|
34
35
|
def end_element(name)
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
subfield = Document::Subfield.new(@subfield[:code], @subfield[:value])
|
39
|
-
@datafield[:subfields] << subfield
|
40
|
-
elsif @in_datafield
|
41
|
-
@in_datafield = false
|
42
|
-
|
43
|
-
datafield = Document::Datafield.new(@datafield[:tag], ind1: @datafield[:ind1], ind2: @datafield[:ind2])
|
44
|
-
@datafield[:subfields].each do |subfield|
|
45
|
-
datafield.add_subfield(subfield)
|
46
|
-
end
|
36
|
+
element_name, element_data = @stack[-1]
|
37
|
+
parent_name, parent_data = @stack[-2]
|
47
38
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
39
|
+
if element_name == :record
|
40
|
+
@documents << element_data[:document] unless element_data[:document].empty?
|
41
|
+
raise ParsingDone unless @collection_mode
|
42
|
+
elsif element_name == :controlfield && parent_name == :record
|
43
|
+
parent_data[:document].add_controlfield(element_data[:controlfield])
|
44
|
+
elsif element_name == :datafield && parent_name == :record
|
45
|
+
parent_data[:document].add_datafield(element_data[:datafield])
|
46
|
+
elsif element_name == :subfield && parent_name == :datafield
|
47
|
+
parent_data[:datafield].add_subfield(element_data[:subfield])
|
54
48
|
end
|
49
|
+
|
50
|
+
@stack.pop
|
55
51
|
end
|
56
52
|
|
57
53
|
def attr(name, value)
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
@controlfield[:tag] = value if name == :tag
|
54
|
+
element_name, element_data = @stack[-1]
|
55
|
+
parent_name, parent_data = @stack[-2]
|
56
|
+
|
57
|
+
if element_name == :controlfield && parent_name == :record
|
58
|
+
element_data[:controlfield].tag = value if name == :tag
|
59
|
+
elsif element_name == :datafield && parent_name == :record
|
60
|
+
element_data[:datafield].tag = value if name == :tag
|
61
|
+
element_data[:datafield].ind1 = value if name == :ind1
|
62
|
+
element_data[:datafield].ind2 = value if name == :ind2
|
63
|
+
elsif element_name == :subfield && parent_name == :datafield
|
64
|
+
element_data[:subfield].code = value if name == :code
|
70
65
|
end
|
71
66
|
end
|
72
67
|
|
73
68
|
def text(value)
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
69
|
+
element_name, element_data = @stack[-1]
|
70
|
+
parent_name, parent_data = @stack[-2]
|
71
|
+
|
72
|
+
if element_name == :controlfield && parent_name == :record
|
73
|
+
element_data[:controlfield].value = value
|
74
|
+
elsif element_name == :subfield && parent_name == :datafield
|
75
|
+
element_data[:subfield].value = value
|
78
76
|
end
|
79
77
|
end
|
80
78
|
end
|
data/metacrunch-marcxml.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metacrunch-marcxml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- René Sprotte
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-02-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -24,20 +24,6 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '5.1'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: htmlentities
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '4.3'
|
34
|
-
type: :runtime
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '4.3'
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
28
|
name: ox
|
43
29
|
requirement: !ruby/object:Gem::Requirement
|