metacrunch-marcxml 3.0.0 → 3.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/metacrunch/marcxml.rb +11 -10
- data/lib/metacrunch/marcxml/document.rb +4 -0
- data/lib/metacrunch/marcxml/document/controlfield.rb +3 -3
- data/lib/metacrunch/marcxml/document/datafield.rb +3 -3
- data/lib/metacrunch/marcxml/document/subfield.rb +3 -3
- data/lib/metacrunch/marcxml/parser.rb +53 -55
- data/lib/metacrunch/marcxml/version.rb +1 -1
- data/metacrunch-marcxml.gemspec +0 -1
- metadata +2 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 639f08dad0d34c7b863be2778a8aa961085b1865d44fdf1ecf84d073f9cac1e5
|
4
|
+
data.tar.gz: 4d1a9023bafef92c0fb6ebaa0b6904895946dc49b550db53e5b9cf1351e79439
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5d0755a1ccaa39754ba1c1bfded2a1d8d9826498fc671a11f9168c1781e13feee3027779cc9a80b4f2631879926ce18a78a4dad5ca12c08e2b06d30723377022
|
7
|
+
data.tar.gz: 0e7fc91bbcca1e2252fe515c53cefdd14782f0fa03b621661861851b9beaa1c5bf89c897e377c936e751e83fd804e1802d9d59e85198482a6be37511a0568bfa
|
data/lib/metacrunch/marcxml.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
require "active_support"
|
2
2
|
require "active_support/core_ext"
|
3
|
-
require "htmlentities"
|
4
3
|
require "ox"
|
5
4
|
|
6
5
|
module Metacrunch
|
@@ -12,11 +11,10 @@ module Metacrunch
|
|
12
11
|
#
|
13
12
|
# Convenience method for Metacrunch::Marcxml.parse
|
14
13
|
#
|
15
|
-
# @return [Metacrunch::Marcxml::Document] the parsed {Metacrunch::Marcxml::Document}
|
16
14
|
# @see Metacrunch::Marcxml#parse
|
17
15
|
#
|
18
|
-
def Marcxml(xml)
|
19
|
-
Metacrunch::Marcxml.parse(xml)
|
16
|
+
def Marcxml(xml, collection_mode: false)
|
17
|
+
Metacrunch::Marcxml.parse(xml, collection_mode: collection_mode)
|
20
18
|
end
|
21
19
|
end
|
22
20
|
|
@@ -27,20 +25,23 @@ module Metacrunch
|
|
27
25
|
# Parses a MARCXML string into a {Metacrunch::Marcxml::Document}.
|
28
26
|
#
|
29
27
|
# @param xml [String] the MARCXML document as a string
|
30
|
-
# @
|
28
|
+
# @param collection_mode [true, false] set to `true` if the MARCXML contains more than one record.
|
29
|
+
# Default is `false`.
|
30
|
+
# @return [Metacrunch::Marcxml::Document, Array<Metacrunch::Marcxml::Document>, nil] the parsed
|
31
|
+
# {Metacrunch::Marcxml::Document}, an array of documents if `collection_mode` was `true`
|
32
|
+
# or `nil` if the MARCXML did not contain valid data.
|
31
33
|
#
|
32
|
-
def parse(xml)
|
33
|
-
Parser.new.parse(xml)
|
34
|
+
def parse(xml, collection_mode: false)
|
35
|
+
Parser.new.parse(xml, collection_mode: collection_mode)
|
34
36
|
end
|
35
37
|
|
36
38
|
#
|
37
|
-
# Convenience method for Metacrunch::Marcxml.parse
|
39
|
+
# Convenience method for Metacrunch::Marcxml.parse(xml, collection_mode: false)
|
38
40
|
#
|
39
|
-
# @return [Metacrunch::Marcxml::Document] the parsed {Metacrunch::Marcxml::Document}
|
40
41
|
# @see Metacrunch::Marcxml#parse
|
41
42
|
#
|
42
43
|
def [](xml)
|
43
|
-
self.parse(xml)
|
44
|
+
self.parse(xml, collection_mode: false)
|
44
45
|
end
|
45
46
|
end
|
46
47
|
end
|
@@ -13,6 +13,10 @@ module Metacrunch
|
|
13
13
|
@datafields_map = {}
|
14
14
|
end
|
15
15
|
|
16
|
+
def empty?
|
17
|
+
@controlfields_map.blank? && @datafields_map.blank?
|
18
|
+
end
|
19
|
+
|
16
20
|
# ------------------------------------------------------------------------------
|
17
21
|
# Control fields
|
18
22
|
# ------------------------------------------------------------------------------
|
@@ -3,10 +3,10 @@ module Metacrunch
|
|
3
3
|
class Document
|
4
4
|
class Datafield
|
5
5
|
|
6
|
-
|
7
|
-
|
6
|
+
attr_accessor :tag
|
7
|
+
attr_accessor :ind1, :ind2
|
8
8
|
|
9
|
-
def initialize(tag, ind1:nil, ind2:nil)
|
9
|
+
def initialize(tag = nil, ind1:nil, ind2:nil)
|
10
10
|
@tag = tag
|
11
11
|
@ind1 = ind1
|
12
12
|
@ind2 = ind2
|
@@ -3,10 +3,10 @@ module Metacrunch
|
|
3
3
|
class Document
|
4
4
|
class Subfield
|
5
5
|
|
6
|
-
|
7
|
-
|
6
|
+
attr_accessor :code
|
7
|
+
attr_accessor :value
|
8
8
|
|
9
|
-
def initialize(code, value)
|
9
|
+
def initialize(code = nil, value = nil)
|
10
10
|
@code = code
|
11
11
|
@value = value
|
12
12
|
end
|
@@ -1,80 +1,78 @@
|
|
1
1
|
module Metacrunch
|
2
2
|
module Marcxml
|
3
3
|
class Parser < Ox::Sax
|
4
|
+
class ParsingDone < StandardError ; end
|
4
5
|
|
5
|
-
def parse(marc_xml)
|
6
|
-
|
7
|
-
@
|
6
|
+
def parse(marc_xml, collection_mode: false)
|
7
|
+
@stack = []
|
8
|
+
@documents = []
|
9
|
+
@collection_mode = collection_mode
|
8
10
|
|
9
|
-
|
10
|
-
|
11
|
-
|
11
|
+
begin
|
12
|
+
Ox.sax_parse(self, marc_xml, convert_special: true)
|
13
|
+
rescue ParsingDone ; end
|
12
14
|
|
13
|
-
|
14
|
-
# parsing, which minifies the amount of entities we have to decode using
|
15
|
-
# html_entities_coder in #text.
|
16
|
-
Ox.sax_parse(self, marc_xml, convert_special: true)
|
17
|
-
|
18
|
-
return @document
|
15
|
+
collection_mode ? @documents : @documents.first
|
19
16
|
end
|
20
17
|
|
21
18
|
def start_element(name)
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
19
|
+
@stack << [name, {}]
|
20
|
+
|
21
|
+
element_name, element_data = @stack[-1]
|
22
|
+
parent_name, parent_data = @stack[-2]
|
23
|
+
|
24
|
+
if element_name == :record
|
25
|
+
element_data[:document] = Document.new
|
26
|
+
elsif element_name == :controlfield && parent_name == :record
|
27
|
+
element_data[:controlfield] = Document::Controlfield.new
|
28
|
+
elsif element_name == :datafield && parent_name == :record
|
29
|
+
element_data[:datafield] = Document::Datafield.new
|
30
|
+
elsif element_name == :subfield && parent_name == :datafield
|
31
|
+
element_data[:subfield] = Document::Subfield.new
|
31
32
|
end
|
32
33
|
end
|
33
34
|
|
34
35
|
def end_element(name)
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
subfield = Document::Subfield.new(@subfield[:code], @subfield[:value])
|
39
|
-
@datafield[:subfields] << subfield
|
40
|
-
elsif @in_datafield
|
41
|
-
@in_datafield = false
|
42
|
-
|
43
|
-
datafield = Document::Datafield.new(@datafield[:tag], ind1: @datafield[:ind1], ind2: @datafield[:ind2])
|
44
|
-
@datafield[:subfields].each do |subfield|
|
45
|
-
datafield.add_subfield(subfield)
|
46
|
-
end
|
36
|
+
element_name, element_data = @stack[-1]
|
37
|
+
parent_name, parent_data = @stack[-2]
|
47
38
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
39
|
+
if element_name == :record
|
40
|
+
@documents << element_data[:document] unless element_data[:document].empty?
|
41
|
+
raise ParsingDone unless @collection_mode
|
42
|
+
elsif element_name == :controlfield && parent_name == :record
|
43
|
+
parent_data[:document].add_controlfield(element_data[:controlfield])
|
44
|
+
elsif element_name == :datafield && parent_name == :record
|
45
|
+
parent_data[:document].add_datafield(element_data[:datafield])
|
46
|
+
elsif element_name == :subfield && parent_name == :datafield
|
47
|
+
parent_data[:datafield].add_subfield(element_data[:subfield])
|
54
48
|
end
|
49
|
+
|
50
|
+
@stack.pop
|
55
51
|
end
|
56
52
|
|
57
53
|
def attr(name, value)
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
@controlfield[:tag] = value if name == :tag
|
54
|
+
element_name, element_data = @stack[-1]
|
55
|
+
parent_name, parent_data = @stack[-2]
|
56
|
+
|
57
|
+
if element_name == :controlfield && parent_name == :record
|
58
|
+
element_data[:controlfield].tag = value if name == :tag
|
59
|
+
elsif element_name == :datafield && parent_name == :record
|
60
|
+
element_data[:datafield].tag = value if name == :tag
|
61
|
+
element_data[:datafield].ind1 = value if name == :ind1
|
62
|
+
element_data[:datafield].ind2 = value if name == :ind2
|
63
|
+
elsif element_name == :subfield && parent_name == :datafield
|
64
|
+
element_data[:subfield].code = value if name == :code
|
70
65
|
end
|
71
66
|
end
|
72
67
|
|
73
68
|
def text(value)
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
69
|
+
element_name, element_data = @stack[-1]
|
70
|
+
parent_name, parent_data = @stack[-2]
|
71
|
+
|
72
|
+
if element_name == :controlfield && parent_name == :record
|
73
|
+
element_data[:controlfield].value = value
|
74
|
+
elsif element_name == :subfield && parent_name == :datafield
|
75
|
+
element_data[:subfield].value = value
|
78
76
|
end
|
79
77
|
end
|
80
78
|
end
|
data/metacrunch-marcxml.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metacrunch-marcxml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- René Sprotte
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-02-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -24,20 +24,6 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '5.1'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: htmlentities
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '4.3'
|
34
|
-
type: :runtime
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '4.3'
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
28
|
name: ox
|
43
29
|
requirement: !ruby/object:Gem::Requirement
|