mab2 0.0.4 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1ddffd930eea7cf17107898031a771fc6dad90be
4
- data.tar.gz: b05ecb5a0ac27dfd41c2b6abec9d198a24db5031
3
+ metadata.gz: 91501bcd9e26b7515c4e60e93b552a1b5a698f49
4
+ data.tar.gz: 8adb6ae2b71b4c392c3c974194f971a648badfae
5
5
  SHA512:
6
- metadata.gz: fa6d7aac578e249cd39daf51aba91ccacd0430e9950326521637d659703eac0fc0c9733fe65cbe557db5f8b8b8959c792ca216cd1c7cd7bf3469e4fdc3af9dbf
7
- data.tar.gz: 4037cb77b8a966576f3e8e7cb0b78fb7393a2410d50b67956392848cb6f03e594e315c52ea640b17754fef00f8bd3c858f42a0d5303a9a2e9c46987b9747b3f2
6
+ metadata.gz: 554ec9a0c4a6396fd1ba9ed14c4efe8bf4bd3086c4f5a0c6c1955f857b1cdde6f9324f84ddca3ec918b1e66965b182338978912a426f86801f028e163d9d48ef
7
+ data.tar.gz: 681cb470282c20a3e8488131577dc432eb3fbcfe3c81dfc5d5dab9afd4c16059597e82ba4d484c7e8f6834504cc8a38291b90ec04bd79e912b771935f7b05769
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ --format documentation
3
+ --require spec_helper
data/Gemfile CHANGED
@@ -1,6 +1,20 @@
1
- source 'https://rubygems.org'
2
-
3
- gem 'badgerfish', path: '~/github/msievers/badgerfish'
1
+ source "https://rubygems.org"
4
2
 
5
3
  # Specify your gem's dependencies in mab2.gemspec
6
4
  gemspec
5
+
6
+ if !ENV["CI"]
7
+ group :development do
8
+ gem "pry", "~> 0.9.12.6"
9
+ gem "pry-byebug", "<= 1.3.2"
10
+ gem "pry-rescue", "~> 1.4.1"
11
+ gem "pry-stack_explorer", "~> 0.4.9.1"
12
+ gem "pry-syntax-hacks", "~> 0.0.6"
13
+ end
14
+ end
15
+
16
+ group :test do
17
+ gem "codeclimate-test-reporter", require: nil
18
+ end
19
+
20
+ gem "badgerfish", github: "msievers/badgerfish", branch: :master
data/Rakefile CHANGED
@@ -1,8 +1,6 @@
1
- require 'bundler/gem_tasks'
2
- require 'rake/testtask'
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
3
 
4
- Rake::TestTask.new do |t|
5
- t.pattern = 'test/test_*.rb'
6
- end
4
+ RSpec::Core::RakeTask.new(:spec)
7
5
 
8
- task :default => :test
6
+ task :default => :spec
@@ -1,4 +1,5 @@
1
+ require "mab2/version"
2
+
1
3
  module Mab2
2
- require 'mab2/document'
3
- require 'mab2/version'
4
+ require_relative "mab2/document"
4
5
  end
@@ -1,75 +1,102 @@
1
- require 'mab2/aleph_sequential_adapter'
2
- require 'mab2/mab_xml_adapter'
1
+ require "active_support"
2
+ require "active_support/core_ext"
3
+ require "badgerfish"
3
4
 
4
5
  class Mab2::Document
5
- require_relative 'document/datafield'
6
- require_relative 'document/scope'
7
- require_relative 'document/subfield'
6
+ require_relative "./document/scope"
8
7
 
9
- attr_accessor :controlfields
10
- attr_accessor :datafields
8
+ attr_accessor :legacy_mabmapper_mode
11
9
 
12
- def initialize(input)
13
- mab = if input.is_a?(Mab2::MabXmlAdapter) || input.is_a?(Mab2::AlephSequentialAdapter)
14
- input
15
- elsif input.is_a?(String) && input.starts_with?('<?xml')
16
- Mab2::MabXmlAdapter.new(input)
17
- elsif input.is_a?(String) && input.starts_with?('LEADER')
18
- Mab2::AlephSequentialAdapter.new(input)
10
+ def initialize(xml, options = {})
11
+ @badgerfish_hash = Badgerfish::Parser.new.load(xml, options)
12
+
13
+ # the original mabmapper dsl is buggy at certain points, but this
14
+ # behaviour might be needed for certain legacy rules
15
+ @legacy_mabmapper_mode =
16
+ if options[:legacy_mabmapper_mode].nil?
17
+ false
19
18
  else
20
- raise 'Unknown input format or adapter!'
19
+ !!options[:legacy_mabmapper_mode]
21
20
  end
22
21
 
23
- @controlfields = mab.controlfields
24
- @datafields = mab.datafields
22
+ create_controlfields_tag_index
23
+ create_datafields_tag_index
25
24
  end
26
25
 
27
26
  #
28
- # scope functions
27
+ # dsl methods
29
28
  #
30
29
  def controlfield(tag)
31
- tag = tag.to_sym # tag should be a symbol
32
- Mab2::Document::Scope.new(self).controlfield(tag)
30
+ controlfields_by_tag(tag)
31
+ .first
32
+ .try do |controlfield|
33
+ controlfield["$"].chars.map! do |char|
34
+ char == "|" ? nil : char
35
+ end
36
+ end || []
37
+ end
38
+
39
+ def doc
40
+ self
33
41
  end
34
42
 
35
- # mabmapper uses field instead of datafield
36
- def field(tag, indicators = {})
37
- tag = tag.to_sym # tag should be a symbol
38
- indicators.each_pair { |key, val| indicators[key] = val.is_a?(Array) ? val.map(&:to_sym) : [val.to_sym] } # indicators should be hash of symbol arrays
39
- Mab2::Document::Scope.new(self).datafield(tag, indicators)
43
+ def field(tag, options = {})
44
+ Scope.new(options.merge(document: self, tag: tag))
40
45
  end
41
46
 
42
47
  #
43
- # query functions
48
+ # other public methods
44
49
  #
45
- def controlfields_by(options = {})
46
- (result = @controlfields[options[:tag]]).nil? ? [] : result.chars
50
+ def controlfields_by_tag(tag)
51
+ (@controlfields_tag_index[tag] || []).map do |index|
52
+ controlfields[index]
53
+ end
47
54
  end
48
55
 
49
- def datafields_by(options = {})
50
- indicators = options[:indicators]
51
- tag = options[:tag]
52
- query_result = []
56
+ def datafields_by_tag(tag)
57
+ (@datafields_tag_index[tag] || []).map do |index|
58
+ datafields[index]
59
+ end
60
+ end
53
61
 
54
- return [] if @datafields[tag].nil? # shortcut for 'no datafield with that tag exists'
62
+ def legacy_mabmapper_mode?
63
+ @legacy_mabmapper_mode
64
+ end
55
65
 
56
- matching_datafields = @datafields[tag].select do |indicator_one_key, indicator_one_value|
57
- indicators[:ind1].nil? || (indicators[:ind1].any? { |ind1| ind1.length > 1 && ind1.to_s.starts_with?('-') } ^ indicators[:ind1].include?(indicator_one_key)) && indicator_one_value.any? do |indicator_two_key, indicator_two_value|
58
- indicators[:ind2].nil? || indicators[:ind2].include?(indicator_two_key)
66
+ #
67
+ private
68
+ #
69
+ def create_controlfields_tag_index
70
+ controlfields.each.with_index.inject(@controlfields_tag_index = {}) do |index, (controlfield, index_within_array)|
71
+ index.tap do |_index|
72
+ (_index[controlfield["@tag"]] ||= []).push(index_within_array)
59
73
  end
60
74
  end
75
+ end
61
76
 
62
- if !matching_datafields.nil?
63
- matching_datafields.each_pair do |indicator_one_key, indicator_one_value|
64
- indicator_one_value.each_pair do |indicator_two_key, indicator_two_value|
65
- indicator_two_value.each do |subfields|
66
- query_result.push Mab2::Document::Datafield.new(tag, indicators, subfields)
67
- end
68
- end
77
+ def create_datafields_tag_index
78
+ datafields.each.with_index.inject(@datafields_tag_index = {}) do |index, (datafield, index_within_array)|
79
+ index.tap do |_index|
80
+ (_index[datafield["@tag"]] ||= []).push(index_within_array)
69
81
  end
70
82
  end
83
+ end
71
84
 
72
- return query_result
85
+ def controlfields
86
+ @controlfields ||=
87
+ begin
88
+ @badgerfish_hash["OAI-PMH"]["ListRecords"]["record"]["metadata"]["record"]["controlfield"].try do |controlfields|
89
+ controlfields.is_a?(Array) ? controlfields : [controlfields]
90
+ end || []
91
+ end
73
92
  end
74
93
 
94
+ def datafields
95
+ @datafields ||=
96
+ begin
97
+ @badgerfish_hash["OAI-PMH"]["ListRecords"]["record"]["metadata"]["record"]["datafield"].try do |datafields|
98
+ datafields.is_a?(Array) ? datafields : [datafields]
99
+ end || []
100
+ end
101
+ end
75
102
  end
@@ -1,56 +1,116 @@
1
- class Mab2::Document::Scope
1
+ require "active_support"
2
+ require "active_support/core_ext"
3
+ require "mab2/document"
2
4
 
3
- def initialize(document)
4
- @document = document
5
+ class Mab2::Document::Scope
6
+ class Subfield < Struct.new(:code, :value)
7
+ alias_method :name, :code
5
8
  end
6
9
 
7
- def controlfield(tag)
8
- @document.controlfields_by(tag: tag)
10
+ def initialize(options = {})
11
+ @document = options[:document]
12
+ @ind1 = [options[:ind1]].flatten(1).compact
13
+ @ind2 = [options[:ind2]].flatten(1).compact
14
+ @tag = options[:tag]
15
+ @subfield = [options[:subfield]].flatten(1).compact
16
+
17
+ # negation extraction
18
+ @ind1.delete_if do |ind1|
19
+ if ind1.start_with?("-") && ind1.length > 1
20
+ (@not_ind1 ||= []).push(ind1[1..-1])
21
+ true
22
+ end
23
+ end
9
24
  end
10
25
 
11
- def datafield(tag, indicators = {})
12
- @scope_datafields = @document.datafields_by(tag: tag, indicators: indicators)
26
+ def get
13
27
  self
14
28
  end
15
29
 
16
- def subfield(subfield_codes)
17
- subfield_codes = subfield_codes.is_a?(Array) ? subfield_codes : [subfield_codes] # subfield_codes should be an array
30
+ def get_subfield(code)
31
+ [datafields.first["subfield"]].compact.flatten(1)
32
+ .try(:find) do |subfield|
33
+ subfield["@code"] == code
34
+ end
35
+ .try do |subfield|
36
+ Subfield.new(subfield["@code"], subfield["$"])
37
+ end || Subfield.new
38
+ end
18
39
 
19
- @scope_subfields = @scope_datafields.map do |datafield|
20
- datafield.subfields.select { |subfield| subfield_codes.include? subfield.name.to_s }
21
- end.flatten! || []
40
+ def fields
41
+ datafields.map do |datafield|
42
+ self.class.new({
43
+ document: @document,
44
+ ind1: datafield["@ind1"],
45
+ ind2: datafield["@ind2"],
46
+ tag: datafield["@tag"]
47
+ })
48
+ end
49
+ end
22
50
 
23
- self
51
+ def subfield(codes)
52
+ self.class.new(to_options.merge(subfield: codes))
53
+ end
54
+
55
+ def subfields
56
+ datafields.map do |datafield|
57
+ [datafield["subfield"]].compact.flatten(1).try(:map) do |subfield|
58
+ Subfield.new(subfield["@code"], subfield["$"])
59
+ end
60
+ end.compact.flatten(1) || []
24
61
  end
25
62
 
26
63
  def value(options = {})
27
- values.first
64
+ separator = options[:join_subfields] || " "
65
+
66
+ if @document.legacy_mabmapper_mode?
67
+ values.first.join(separator)
68
+ else
69
+ values.map(&:presence).compact.first.try(:join, separator)
70
+ end
28
71
  end
29
72
 
30
73
  def values(options = {})
31
- unless @scope_values
32
- # if no subfields where specified so far, take all (e.g. doc.datafield('100').values))
33
- @scope_subfields ||= (@scope_datafields.map { |datafield| datafield.subfields }).flatten! || []
34
- subfield_values = @scope_subfields.map { |subfield| subfield.value }
35
- @scope_values = options[:join_subfields] ? subfield_values.join(options[:join_subfields]) : subfield_values
36
- else
37
- @scope_values
74
+ datafields
75
+ .map do |field|
76
+ [field["subfield"]].compact.flatten(1)
77
+ .select do |subfield|
78
+ @subfield.empty? || @subfield.include?(subfield["@code"])
79
+ end
80
+ .map do |subfield|
81
+ subfield["$"]
82
+ end
83
+ .try do |subfield_values|
84
+ if separator = options[:join_subfields]
85
+ subfield_values.join(separator)
86
+ else
87
+ subfield_values
88
+ end
89
+ end
38
90
  end
39
91
  end
40
92
 
41
93
  #
42
- # compatibility
94
+ private
43
95
  #
44
- def get
45
- self
46
- end
47
-
48
- def fields
49
- @scope_datafields
96
+ def datafields
97
+ @document.datafields_by_tag(@tag)
98
+ .dup
99
+ .tap do |fields|
100
+ fields.select! do |field|
101
+ (@ind1.empty? || @ind1.include?(field["@ind1"]) || (@not_ind1.present? && !@not_ind1.include?(field["@ind1"]))) &&
102
+ (@ind2.empty? || @ind2.include?(field["@ind2"]))
103
+ end
104
+ end
50
105
  end
51
106
 
52
- def subfields
53
- @scope_subfields
107
+ def to_options
108
+ {
109
+ document: @document,
110
+ ind1: @ind1,
111
+ ind2: @ind2,
112
+ tag: @tag,
113
+ subfield: @subfield
114
+ }
54
115
  end
55
-
56
116
  end
@@ -1,3 +1,3 @@
1
1
  module Mab2
2
- VERSION = "0.0.4"
2
+ VERSION = "0.1.0"
3
3
  end
@@ -1,29 +1,27 @@
1
1
  # coding: utf-8
2
- lib = File.expand_path('../lib', __FILE__)
2
+ lib = File.expand_path("../lib", __FILE__)
3
3
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
- require 'mab2/version'
4
+ require "mab2/version"
5
5
 
6
6
  Gem::Specification.new do |spec|
7
- spec.name = 'mab2'
7
+ spec.name = "mab2"
8
8
  spec.version = Mab2::VERSION
9
9
  spec.authors = ["Michael Sievers"]
10
- spec.email = ["michael_sievers@web.de"]
11
- spec.description = %q{Simplifies access and handling of MAB documents}
10
+ spec.description = %q{Simplifies access and handling of Mab documents}
12
11
  spec.summary = %q{Implementation if the mabmapper DSL}
13
- spec.homepage = 'https://github.com/msievers/mab2'
14
- spec.license = 'MIT'
12
+ spec.homepage = "https://github.com/msievers/mab2"
13
+ spec.license = "MIT"
15
14
 
16
15
  spec.files = `git ls-files`.split($/)
17
16
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
17
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
18
  spec.require_paths = ["lib"]
20
19
 
21
- spec.add_dependency 'nokogiri', '~> 1.6.0'
20
+ spec.add_dependency "activesupport"
21
+ spec.add_dependency "badgerfish", ">= 0.2.0"
22
22
 
23
- spec.add_development_dependency 'activesupport', '>= 3.2.0', '< 4.0.0'
24
- spec.add_development_dependency 'bundler', '~> 1.3'
25
- spec.add_development_dependency 'minitest', '~> 5.0.6'
26
- spec.add_development_dependency 'pry', '~> 0.9.12.2'
27
- spec.add_development_dependency 'pry-nav', '~> 0.2.3'
28
- spec.add_development_dependency 'rake'
23
+ spec.add_development_dependency "bundler", "~> 1.7"
24
+ spec.add_development_dependency "rake"
25
+ spec.add_development_dependency "rspec", ">= 3.0.0"
26
+ spec.add_development_dependency "simplecov", ">= 0.8.0"
29
27
  end
@@ -4,32 +4,24 @@
4
4
  <record>
5
5
  <metadata>
6
6
  <record xmlns="http://www.ddb.de/professionell/mabxml/mabxml-1.xsd">
7
- <datafield tag="PSW" ind1="-" ind2="1">
8
- <subfield code="s">Politik</subfield>
9
- <subfield code="x">Theorie</subfield>
10
- <subfield code="9">(DE-588)4046563-9</subfield>
7
+ <controlfield tag="050">a|||||||||||||</controlfield>
8
+ <datafield tag="100" ind1="1" ind2="2">
9
+ <subfield code="a">Some string</subfield>
11
10
  </datafield>
12
- <datafield tag="PSW" ind1="-" ind2="1">
11
+ <datafield tag="PSW" ind1="1" ind2="1">
13
12
  <subfield code="s">Politiktheorie</subfield>
14
13
  <subfield code="9">(DE-588)4046563-9</subfield>
15
14
  </datafield>
16
- <datafield tag="PSW" ind1="-" ind2="1">
17
- <subfield code="s">Politische Theorien</subfield>
18
- <subfield code="9">(DE-588)4046563-9</subfield>
19
- </datafield>
20
15
  <datafield tag="PSW" ind1="-" ind2="1">
21
16
  <subfield code="s">Politik</subfield>
22
17
  <subfield code="x">Theorie</subfield>
23
18
  <subfield code="9">(DE-588)4046563-9</subfield>
24
19
  </datafield>
25
- <datafield tag="PSW" ind1="-" ind2="1">
26
- <subfield code="s">Politiktheorie</subfield>
27
- <subfield code="9">(DE-588)4046563-9</subfield>
28
- </datafield>
29
- <datafield tag="PSW" ind1="-" ind2="1">
20
+ <datafield tag="PSW" ind1="1" ind2="2">
30
21
  <subfield code="s">Politische Theorien</subfield>
31
22
  <subfield code="9">(DE-588)4046563-9</subfield>
32
23
  </datafield>
24
+ <controlfield tag="SYS">000978177</controlfield>
33
25
  </record>
34
26
  </metadata>
35
27
  </record>
@@ -0,0 +1,34 @@
1
+ describe Mab2::Document::Scope::Subfield do
2
+ let(:code) { "s" }
3
+ let(:value) { "Politiktheorie" }
4
+
5
+ let(:subfield) do
6
+ described_class.new(code, value)
7
+ end
8
+
9
+ describe "#initialize" do
10
+ context "if callend with a code and a value" do
11
+ it "returns an instance of #{described_class}" do
12
+ expect(subfield).to be_a(described_class)
13
+ end
14
+ end
15
+ end
16
+
17
+ describe "#code" do
18
+ it "returns the code of the subfield" do
19
+ expect(subfield.code).to eq(code)
20
+ end
21
+ end
22
+
23
+ describe "#name" do
24
+ it "is an alias for #code" do
25
+ expect(subfield.name).to eq(subfield.code)
26
+ end
27
+ end
28
+
29
+ describe "#value" do
30
+ it "returns the value of the subfield" do
31
+ expect(subfield.value).to eq(value)
32
+ end
33
+ end
34
+ end