mab2 0.0.4 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rspec +3 -0
- data/Gemfile +17 -3
- data/Rakefile +4 -6
- data/lib/mab2.rb +3 -2
- data/lib/mab2/document.rb +71 -44
- data/lib/mab2/document/scope.rb +91 -31
- data/lib/mab2/version.rb +1 -1
- data/mab2.gemspec +12 -14
- data/{test/mab_files/dsl_2.xml → spec/assets/mab_files/mab.xml} +6 -14
- data/spec/mab2/document/scope/subfield_spec.rb +34 -0
- data/spec/mab2/document/scope_spec.rb +140 -0
- data/spec/mab2/document_spec.rb +52 -0
- data/spec/mab2_spec.rb +2 -0
- data/spec/spec_helper.rb +30 -0
- metadata +51 -79
- data/lib/mab2/aleph_sequential_adapter.rb +0 -10
- data/lib/mab2/document/datafield.rb +0 -15
- data/lib/mab2/document/subfield.rb +0 -10
- data/lib/mab2/mab_xml_adapter.rb +0 -50
- data/test/mab2/test_controlfield.rb +0 -24
- data/test/mab2/test_datafield.rb +0 -131
- data/test/mab_files/PAD01.000806191.PRIMO.xml +0 -216
- data/test/mab_files/PAD01.000978177.PRIMO.xml +0 -203
- data/test/mab_files/dsl_1.xml +0 -14
- data/test/test_helper.rb +0 -12
- data/test/test_mab2.rb +0 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 91501bcd9e26b7515c4e60e93b552a1b5a698f49
|
4
|
+
data.tar.gz: 8adb6ae2b71b4c392c3c974194f971a648badfae
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 554ec9a0c4a6396fd1ba9ed14c4efe8bf4bd3086c4f5a0c6c1955f857b1cdde6f9324f84ddca3ec918b1e66965b182338978912a426f86801f028e163d9d48ef
|
7
|
+
data.tar.gz: 681cb470282c20a3e8488131577dc432eb3fbcfe3c81dfc5d5dab9afd4c16059597e82ba4d484c7e8f6834504cc8a38291b90ec04bd79e912b771935f7b05769
|
data/.rspec
ADDED
data/Gemfile
CHANGED
@@ -1,6 +1,20 @@
|
|
1
|
-
source
|
2
|
-
|
3
|
-
gem 'badgerfish', path: '~/github/msievers/badgerfish'
|
1
|
+
source "https://rubygems.org"
|
4
2
|
|
5
3
|
# Specify your gem's dependencies in mab2.gemspec
|
6
4
|
gemspec
|
5
|
+
|
6
|
+
if !ENV["CI"]
|
7
|
+
group :development do
|
8
|
+
gem "pry", "~> 0.9.12.6"
|
9
|
+
gem "pry-byebug", "<= 1.3.2"
|
10
|
+
gem "pry-rescue", "~> 1.4.1"
|
11
|
+
gem "pry-stack_explorer", "~> 0.4.9.1"
|
12
|
+
gem "pry-syntax-hacks", "~> 0.0.6"
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
group :test do
|
17
|
+
gem "codeclimate-test-reporter", require: nil
|
18
|
+
end
|
19
|
+
|
20
|
+
gem "badgerfish", github: "msievers/badgerfish", branch: :master
|
data/Rakefile
CHANGED
@@ -1,8 +1,6 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require "rspec/core/rake_task"
|
3
3
|
|
4
|
-
|
5
|
-
t.pattern = 'test/test_*.rb'
|
6
|
-
end
|
4
|
+
RSpec::Core::RakeTask.new(:spec)
|
7
5
|
|
8
|
-
task :default => :
|
6
|
+
task :default => :spec
|
data/lib/mab2.rb
CHANGED
data/lib/mab2/document.rb
CHANGED
@@ -1,75 +1,102 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require "active_support"
|
2
|
+
require "active_support/core_ext"
|
3
|
+
require "badgerfish"
|
3
4
|
|
4
5
|
class Mab2::Document
|
5
|
-
require_relative
|
6
|
-
require_relative 'document/scope'
|
7
|
-
require_relative 'document/subfield'
|
6
|
+
require_relative "./document/scope"
|
8
7
|
|
9
|
-
attr_accessor :
|
10
|
-
attr_accessor :datafields
|
8
|
+
attr_accessor :legacy_mabmapper_mode
|
11
9
|
|
12
|
-
def initialize(
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
10
|
+
def initialize(xml, options = {})
|
11
|
+
@badgerfish_hash = Badgerfish::Parser.new.load(xml, options)
|
12
|
+
|
13
|
+
# the original mabmapper dsl is buggy at certain points, but this
|
14
|
+
# behaviour might be needed for certain legacy rules
|
15
|
+
@legacy_mabmapper_mode =
|
16
|
+
if options[:legacy_mabmapper_mode].nil?
|
17
|
+
false
|
19
18
|
else
|
20
|
-
|
19
|
+
!!options[:legacy_mabmapper_mode]
|
21
20
|
end
|
22
21
|
|
23
|
-
|
24
|
-
|
22
|
+
create_controlfields_tag_index
|
23
|
+
create_datafields_tag_index
|
25
24
|
end
|
26
25
|
|
27
26
|
#
|
28
|
-
#
|
27
|
+
# dsl methods
|
29
28
|
#
|
30
29
|
def controlfield(tag)
|
31
|
-
tag
|
32
|
-
|
30
|
+
controlfields_by_tag(tag)
|
31
|
+
.first
|
32
|
+
.try do |controlfield|
|
33
|
+
controlfield["$"].chars.map! do |char|
|
34
|
+
char == "|" ? nil : char
|
35
|
+
end
|
36
|
+
end || []
|
37
|
+
end
|
38
|
+
|
39
|
+
def doc
|
40
|
+
self
|
33
41
|
end
|
34
42
|
|
35
|
-
|
36
|
-
|
37
|
-
tag = tag.to_sym # tag should be a symbol
|
38
|
-
indicators.each_pair { |key, val| indicators[key] = val.is_a?(Array) ? val.map(&:to_sym) : [val.to_sym] } # indicators should be hash of symbol arrays
|
39
|
-
Mab2::Document::Scope.new(self).datafield(tag, indicators)
|
43
|
+
def field(tag, options = {})
|
44
|
+
Scope.new(options.merge(document: self, tag: tag))
|
40
45
|
end
|
41
46
|
|
42
47
|
#
|
43
|
-
#
|
48
|
+
# other public methods
|
44
49
|
#
|
45
|
-
def
|
46
|
-
(
|
50
|
+
def controlfields_by_tag(tag)
|
51
|
+
(@controlfields_tag_index[tag] || []).map do |index|
|
52
|
+
controlfields[index]
|
53
|
+
end
|
47
54
|
end
|
48
55
|
|
49
|
-
def
|
50
|
-
|
51
|
-
|
52
|
-
|
56
|
+
def datafields_by_tag(tag)
|
57
|
+
(@datafields_tag_index[tag] || []).map do |index|
|
58
|
+
datafields[index]
|
59
|
+
end
|
60
|
+
end
|
53
61
|
|
54
|
-
|
62
|
+
def legacy_mabmapper_mode?
|
63
|
+
@legacy_mabmapper_mode
|
64
|
+
end
|
55
65
|
|
56
|
-
|
57
|
-
|
58
|
-
|
66
|
+
#
|
67
|
+
private
|
68
|
+
#
|
69
|
+
def create_controlfields_tag_index
|
70
|
+
controlfields.each.with_index.inject(@controlfields_tag_index = {}) do |index, (controlfield, index_within_array)|
|
71
|
+
index.tap do |_index|
|
72
|
+
(_index[controlfield["@tag"]] ||= []).push(index_within_array)
|
59
73
|
end
|
60
74
|
end
|
75
|
+
end
|
61
76
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
query_result.push Mab2::Document::Datafield.new(tag, indicators, subfields)
|
67
|
-
end
|
68
|
-
end
|
77
|
+
def create_datafields_tag_index
|
78
|
+
datafields.each.with_index.inject(@datafields_tag_index = {}) do |index, (datafield, index_within_array)|
|
79
|
+
index.tap do |_index|
|
80
|
+
(_index[datafield["@tag"]] ||= []).push(index_within_array)
|
69
81
|
end
|
70
82
|
end
|
83
|
+
end
|
71
84
|
|
72
|
-
|
85
|
+
def controlfields
|
86
|
+
@controlfields ||=
|
87
|
+
begin
|
88
|
+
@badgerfish_hash["OAI-PMH"]["ListRecords"]["record"]["metadata"]["record"]["controlfield"].try do |controlfields|
|
89
|
+
controlfields.is_a?(Array) ? controlfields : [controlfields]
|
90
|
+
end || []
|
91
|
+
end
|
73
92
|
end
|
74
93
|
|
94
|
+
def datafields
|
95
|
+
@datafields ||=
|
96
|
+
begin
|
97
|
+
@badgerfish_hash["OAI-PMH"]["ListRecords"]["record"]["metadata"]["record"]["datafield"].try do |datafields|
|
98
|
+
datafields.is_a?(Array) ? datafields : [datafields]
|
99
|
+
end || []
|
100
|
+
end
|
101
|
+
end
|
75
102
|
end
|
data/lib/mab2/document/scope.rb
CHANGED
@@ -1,56 +1,116 @@
|
|
1
|
-
|
1
|
+
require "active_support"
|
2
|
+
require "active_support/core_ext"
|
3
|
+
require "mab2/document"
|
2
4
|
|
3
|
-
|
4
|
-
|
5
|
+
class Mab2::Document::Scope
|
6
|
+
class Subfield < Struct.new(:code, :value)
|
7
|
+
alias_method :name, :code
|
5
8
|
end
|
6
9
|
|
7
|
-
def
|
8
|
-
@document
|
10
|
+
def initialize(options = {})
|
11
|
+
@document = options[:document]
|
12
|
+
@ind1 = [options[:ind1]].flatten(1).compact
|
13
|
+
@ind2 = [options[:ind2]].flatten(1).compact
|
14
|
+
@tag = options[:tag]
|
15
|
+
@subfield = [options[:subfield]].flatten(1).compact
|
16
|
+
|
17
|
+
# negation extraction
|
18
|
+
@ind1.delete_if do |ind1|
|
19
|
+
if ind1.start_with?("-") && ind1.length > 1
|
20
|
+
(@not_ind1 ||= []).push(ind1[1..-1])
|
21
|
+
true
|
22
|
+
end
|
23
|
+
end
|
9
24
|
end
|
10
25
|
|
11
|
-
def
|
12
|
-
@scope_datafields = @document.datafields_by(tag: tag, indicators: indicators)
|
26
|
+
def get
|
13
27
|
self
|
14
28
|
end
|
15
29
|
|
16
|
-
def
|
17
|
-
|
30
|
+
def get_subfield(code)
|
31
|
+
[datafields.first["subfield"]].compact.flatten(1)
|
32
|
+
.try(:find) do |subfield|
|
33
|
+
subfield["@code"] == code
|
34
|
+
end
|
35
|
+
.try do |subfield|
|
36
|
+
Subfield.new(subfield["@code"], subfield["$"])
|
37
|
+
end || Subfield.new
|
38
|
+
end
|
18
39
|
|
19
|
-
|
20
|
-
|
21
|
-
|
40
|
+
def fields
|
41
|
+
datafields.map do |datafield|
|
42
|
+
self.class.new({
|
43
|
+
document: @document,
|
44
|
+
ind1: datafield["@ind1"],
|
45
|
+
ind2: datafield["@ind2"],
|
46
|
+
tag: datafield["@tag"]
|
47
|
+
})
|
48
|
+
end
|
49
|
+
end
|
22
50
|
|
23
|
-
|
51
|
+
def subfield(codes)
|
52
|
+
self.class.new(to_options.merge(subfield: codes))
|
53
|
+
end
|
54
|
+
|
55
|
+
def subfields
|
56
|
+
datafields.map do |datafield|
|
57
|
+
[datafield["subfield"]].compact.flatten(1).try(:map) do |subfield|
|
58
|
+
Subfield.new(subfield["@code"], subfield["$"])
|
59
|
+
end
|
60
|
+
end.compact.flatten(1) || []
|
24
61
|
end
|
25
62
|
|
26
63
|
def value(options = {})
|
27
|
-
|
64
|
+
separator = options[:join_subfields] || " "
|
65
|
+
|
66
|
+
if @document.legacy_mabmapper_mode?
|
67
|
+
values.first.join(separator)
|
68
|
+
else
|
69
|
+
values.map(&:presence).compact.first.try(:join, separator)
|
70
|
+
end
|
28
71
|
end
|
29
72
|
|
30
73
|
def values(options = {})
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
74
|
+
datafields
|
75
|
+
.map do |field|
|
76
|
+
[field["subfield"]].compact.flatten(1)
|
77
|
+
.select do |subfield|
|
78
|
+
@subfield.empty? || @subfield.include?(subfield["@code"])
|
79
|
+
end
|
80
|
+
.map do |subfield|
|
81
|
+
subfield["$"]
|
82
|
+
end
|
83
|
+
.try do |subfield_values|
|
84
|
+
if separator = options[:join_subfields]
|
85
|
+
subfield_values.join(separator)
|
86
|
+
else
|
87
|
+
subfield_values
|
88
|
+
end
|
89
|
+
end
|
38
90
|
end
|
39
91
|
end
|
40
92
|
|
41
93
|
#
|
42
|
-
|
94
|
+
private
|
43
95
|
#
|
44
|
-
def
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
96
|
+
def datafields
|
97
|
+
@document.datafields_by_tag(@tag)
|
98
|
+
.dup
|
99
|
+
.tap do |fields|
|
100
|
+
fields.select! do |field|
|
101
|
+
(@ind1.empty? || @ind1.include?(field["@ind1"]) || (@not_ind1.present? && !@not_ind1.include?(field["@ind1"]))) &&
|
102
|
+
(@ind2.empty? || @ind2.include?(field["@ind2"]))
|
103
|
+
end
|
104
|
+
end
|
50
105
|
end
|
51
106
|
|
52
|
-
def
|
53
|
-
|
107
|
+
def to_options
|
108
|
+
{
|
109
|
+
document: @document,
|
110
|
+
ind1: @ind1,
|
111
|
+
ind2: @ind2,
|
112
|
+
tag: @tag,
|
113
|
+
subfield: @subfield
|
114
|
+
}
|
54
115
|
end
|
55
|
-
|
56
116
|
end
|
data/lib/mab2/version.rb
CHANGED
data/mab2.gemspec
CHANGED
@@ -1,29 +1,27 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
lib = File.expand_path(
|
2
|
+
lib = File.expand_path("../lib", __FILE__)
|
3
3
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
-
require
|
4
|
+
require "mab2/version"
|
5
5
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name =
|
7
|
+
spec.name = "mab2"
|
8
8
|
spec.version = Mab2::VERSION
|
9
9
|
spec.authors = ["Michael Sievers"]
|
10
|
-
spec.
|
11
|
-
spec.description = %q{Simplifies access and handling of MAB documents}
|
10
|
+
spec.description = %q{Simplifies access and handling of Mab documents}
|
12
11
|
spec.summary = %q{Implementation if the mabmapper DSL}
|
13
|
-
spec.homepage =
|
14
|
-
spec.license =
|
12
|
+
spec.homepage = "https://github.com/msievers/mab2"
|
13
|
+
spec.license = "MIT"
|
15
14
|
|
16
15
|
spec.files = `git ls-files`.split($/)
|
17
16
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
17
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
18
|
spec.require_paths = ["lib"]
|
20
19
|
|
21
|
-
spec.add_dependency
|
20
|
+
spec.add_dependency "activesupport"
|
21
|
+
spec.add_dependency "badgerfish", ">= 0.2.0"
|
22
22
|
|
23
|
-
spec.add_development_dependency
|
24
|
-
spec.add_development_dependency
|
25
|
-
spec.add_development_dependency
|
26
|
-
spec.add_development_dependency
|
27
|
-
spec.add_development_dependency 'pry-nav', '~> 0.2.3'
|
28
|
-
spec.add_development_dependency 'rake'
|
23
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
24
|
+
spec.add_development_dependency "rake"
|
25
|
+
spec.add_development_dependency "rspec", ">= 3.0.0"
|
26
|
+
spec.add_development_dependency "simplecov", ">= 0.8.0"
|
29
27
|
end
|
@@ -4,32 +4,24 @@
|
|
4
4
|
<record>
|
5
5
|
<metadata>
|
6
6
|
<record xmlns="http://www.ddb.de/professionell/mabxml/mabxml-1.xsd">
|
7
|
-
<
|
8
|
-
|
9
|
-
<subfield code="
|
10
|
-
<subfield code="9">(DE-588)4046563-9</subfield>
|
7
|
+
<controlfield tag="050">a|||||||||||||</controlfield>
|
8
|
+
<datafield tag="100" ind1="1" ind2="2">
|
9
|
+
<subfield code="a">Some string</subfield>
|
11
10
|
</datafield>
|
12
|
-
<datafield tag="PSW" ind1="
|
11
|
+
<datafield tag="PSW" ind1="1" ind2="1">
|
13
12
|
<subfield code="s">Politiktheorie</subfield>
|
14
13
|
<subfield code="9">(DE-588)4046563-9</subfield>
|
15
14
|
</datafield>
|
16
|
-
<datafield tag="PSW" ind1="-" ind2="1">
|
17
|
-
<subfield code="s">Politische Theorien</subfield>
|
18
|
-
<subfield code="9">(DE-588)4046563-9</subfield>
|
19
|
-
</datafield>
|
20
15
|
<datafield tag="PSW" ind1="-" ind2="1">
|
21
16
|
<subfield code="s">Politik</subfield>
|
22
17
|
<subfield code="x">Theorie</subfield>
|
23
18
|
<subfield code="9">(DE-588)4046563-9</subfield>
|
24
19
|
</datafield>
|
25
|
-
<datafield tag="PSW" ind1="
|
26
|
-
<subfield code="s">Politiktheorie</subfield>
|
27
|
-
<subfield code="9">(DE-588)4046563-9</subfield>
|
28
|
-
</datafield>
|
29
|
-
<datafield tag="PSW" ind1="-" ind2="1">
|
20
|
+
<datafield tag="PSW" ind1="1" ind2="2">
|
30
21
|
<subfield code="s">Politische Theorien</subfield>
|
31
22
|
<subfield code="9">(DE-588)4046563-9</subfield>
|
32
23
|
</datafield>
|
24
|
+
<controlfield tag="SYS">000978177</controlfield>
|
33
25
|
</record>
|
34
26
|
</metadata>
|
35
27
|
</record>
|
@@ -0,0 +1,34 @@
|
|
1
|
+
describe Mab2::Document::Scope::Subfield do
|
2
|
+
let(:code) { "s" }
|
3
|
+
let(:value) { "Politiktheorie" }
|
4
|
+
|
5
|
+
let(:subfield) do
|
6
|
+
described_class.new(code, value)
|
7
|
+
end
|
8
|
+
|
9
|
+
describe "#initialize" do
|
10
|
+
context "if callend with a code and a value" do
|
11
|
+
it "returns an instance of #{described_class}" do
|
12
|
+
expect(subfield).to be_a(described_class)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
describe "#code" do
|
18
|
+
it "returns the code of the subfield" do
|
19
|
+
expect(subfield.code).to eq(code)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
describe "#name" do
|
24
|
+
it "is an alias for #code" do
|
25
|
+
expect(subfield.name).to eq(subfield.code)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
describe "#value" do
|
30
|
+
it "returns the value of the subfield" do
|
31
|
+
expect(subfield.value).to eq(value)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|