mab2 0.0.4 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1ddffd930eea7cf17107898031a771fc6dad90be
4
- data.tar.gz: b05ecb5a0ac27dfd41c2b6abec9d198a24db5031
3
+ metadata.gz: 91501bcd9e26b7515c4e60e93b552a1b5a698f49
4
+ data.tar.gz: 8adb6ae2b71b4c392c3c974194f971a648badfae
5
5
  SHA512:
6
- metadata.gz: fa6d7aac578e249cd39daf51aba91ccacd0430e9950326521637d659703eac0fc0c9733fe65cbe557db5f8b8b8959c792ca216cd1c7cd7bf3469e4fdc3af9dbf
7
- data.tar.gz: 4037cb77b8a966576f3e8e7cb0b78fb7393a2410d50b67956392848cb6f03e594e315c52ea640b17754fef00f8bd3c858f42a0d5303a9a2e9c46987b9747b3f2
6
+ metadata.gz: 554ec9a0c4a6396fd1ba9ed14c4efe8bf4bd3086c4f5a0c6c1955f857b1cdde6f9324f84ddca3ec918b1e66965b182338978912a426f86801f028e163d9d48ef
7
+ data.tar.gz: 681cb470282c20a3e8488131577dc432eb3fbcfe3c81dfc5d5dab9afd4c16059597e82ba4d484c7e8f6834504cc8a38291b90ec04bd79e912b771935f7b05769
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ --format documentation
3
+ --require spec_helper
data/Gemfile CHANGED
@@ -1,6 +1,20 @@
1
- source 'https://rubygems.org'
2
-
3
- gem 'badgerfish', path: '~/github/msievers/badgerfish'
1
+ source "https://rubygems.org"
4
2
 
5
3
  # Specify your gem's dependencies in mab2.gemspec
6
4
  gemspec
5
+
6
+ if !ENV["CI"]
7
+ group :development do
8
+ gem "pry", "~> 0.9.12.6"
9
+ gem "pry-byebug", "<= 1.3.2"
10
+ gem "pry-rescue", "~> 1.4.1"
11
+ gem "pry-stack_explorer", "~> 0.4.9.1"
12
+ gem "pry-syntax-hacks", "~> 0.0.6"
13
+ end
14
+ end
15
+
16
+ group :test do
17
+ gem "codeclimate-test-reporter", require: nil
18
+ end
19
+
20
+ gem "badgerfish", github: "msievers/badgerfish", branch: :master
data/Rakefile CHANGED
@@ -1,8 +1,6 @@
1
- require 'bundler/gem_tasks'
2
- require 'rake/testtask'
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
3
 
4
- Rake::TestTask.new do |t|
5
- t.pattern = 'test/test_*.rb'
6
- end
4
+ RSpec::Core::RakeTask.new(:spec)
7
5
 
8
- task :default => :test
6
+ task :default => :spec
@@ -1,4 +1,5 @@
1
+ require "mab2/version"
2
+
1
3
  module Mab2
2
- require 'mab2/document'
3
- require 'mab2/version'
4
+ require_relative "mab2/document"
4
5
  end
@@ -1,75 +1,102 @@
1
- require 'mab2/aleph_sequential_adapter'
2
- require 'mab2/mab_xml_adapter'
1
+ require "active_support"
2
+ require "active_support/core_ext"
3
+ require "badgerfish"
3
4
 
4
5
  class Mab2::Document
5
- require_relative 'document/datafield'
6
- require_relative 'document/scope'
7
- require_relative 'document/subfield'
6
+ require_relative "./document/scope"
8
7
 
9
- attr_accessor :controlfields
10
- attr_accessor :datafields
8
+ attr_accessor :legacy_mabmapper_mode
11
9
 
12
- def initialize(input)
13
- mab = if input.is_a?(Mab2::MabXmlAdapter) || input.is_a?(Mab2::AlephSequentialAdapter)
14
- input
15
- elsif input.is_a?(String) && input.starts_with?('<?xml')
16
- Mab2::MabXmlAdapter.new(input)
17
- elsif input.is_a?(String) && input.starts_with?('LEADER')
18
- Mab2::AlephSequentialAdapter.new(input)
10
+ def initialize(xml, options = {})
11
+ @badgerfish_hash = Badgerfish::Parser.new.load(xml, options)
12
+
13
+ # the original mabmapper dsl is buggy at certain points, but this
14
+ # behaviour might be needed for certain legacy rules
15
+ @legacy_mabmapper_mode =
16
+ if options[:legacy_mabmapper_mode].nil?
17
+ false
19
18
  else
20
- raise 'Unknown input format or adapter!'
19
+ !!options[:legacy_mabmapper_mode]
21
20
  end
22
21
 
23
- @controlfields = mab.controlfields
24
- @datafields = mab.datafields
22
+ create_controlfields_tag_index
23
+ create_datafields_tag_index
25
24
  end
26
25
 
27
26
  #
28
- # scope functions
27
+ # dsl methods
29
28
  #
30
29
  def controlfield(tag)
31
- tag = tag.to_sym # tag should be a symbol
32
- Mab2::Document::Scope.new(self).controlfield(tag)
30
+ controlfields_by_tag(tag)
31
+ .first
32
+ .try do |controlfield|
33
+ controlfield["$"].chars.map! do |char|
34
+ char == "|" ? nil : char
35
+ end
36
+ end || []
37
+ end
38
+
39
+ def doc
40
+ self
33
41
  end
34
42
 
35
- # mabmapper uses field instead of datafield
36
- def field(tag, indicators = {})
37
- tag = tag.to_sym # tag should be a symbol
38
- indicators.each_pair { |key, val| indicators[key] = val.is_a?(Array) ? val.map(&:to_sym) : [val.to_sym] } # indicators should be hash of symbol arrays
39
- Mab2::Document::Scope.new(self).datafield(tag, indicators)
43
+ def field(tag, options = {})
44
+ Scope.new(options.merge(document: self, tag: tag))
40
45
  end
41
46
 
42
47
  #
43
- # query functions
48
+ # other public methods
44
49
  #
45
- def controlfields_by(options = {})
46
- (result = @controlfields[options[:tag]]).nil? ? [] : result.chars
50
+ def controlfields_by_tag(tag)
51
+ (@controlfields_tag_index[tag] || []).map do |index|
52
+ controlfields[index]
53
+ end
47
54
  end
48
55
 
49
- def datafields_by(options = {})
50
- indicators = options[:indicators]
51
- tag = options[:tag]
52
- query_result = []
56
+ def datafields_by_tag(tag)
57
+ (@datafields_tag_index[tag] || []).map do |index|
58
+ datafields[index]
59
+ end
60
+ end
53
61
 
54
- return [] if @datafields[tag].nil? # shortcut for 'no datafield with that tag exists'
62
+ def legacy_mabmapper_mode?
63
+ @legacy_mabmapper_mode
64
+ end
55
65
 
56
- matching_datafields = @datafields[tag].select do |indicator_one_key, indicator_one_value|
57
- indicators[:ind1].nil? || (indicators[:ind1].any? { |ind1| ind1.length > 1 && ind1.to_s.starts_with?('-') } ^ indicators[:ind1].include?(indicator_one_key)) && indicator_one_value.any? do |indicator_two_key, indicator_two_value|
58
- indicators[:ind2].nil? || indicators[:ind2].include?(indicator_two_key)
66
+ #
67
+ private
68
+ #
69
+ def create_controlfields_tag_index
70
+ controlfields.each.with_index.inject(@controlfields_tag_index = {}) do |index, (controlfield, index_within_array)|
71
+ index.tap do |_index|
72
+ (_index[controlfield["@tag"]] ||= []).push(index_within_array)
59
73
  end
60
74
  end
75
+ end
61
76
 
62
- if !matching_datafields.nil?
63
- matching_datafields.each_pair do |indicator_one_key, indicator_one_value|
64
- indicator_one_value.each_pair do |indicator_two_key, indicator_two_value|
65
- indicator_two_value.each do |subfields|
66
- query_result.push Mab2::Document::Datafield.new(tag, indicators, subfields)
67
- end
68
- end
77
+ def create_datafields_tag_index
78
+ datafields.each.with_index.inject(@datafields_tag_index = {}) do |index, (datafield, index_within_array)|
79
+ index.tap do |_index|
80
+ (_index[datafield["@tag"]] ||= []).push(index_within_array)
69
81
  end
70
82
  end
83
+ end
71
84
 
72
- return query_result
85
+ def controlfields
86
+ @controlfields ||=
87
+ begin
88
+ @badgerfish_hash["OAI-PMH"]["ListRecords"]["record"]["metadata"]["record"]["controlfield"].try do |controlfields|
89
+ controlfields.is_a?(Array) ? controlfields : [controlfields]
90
+ end || []
91
+ end
73
92
  end
74
93
 
94
+ def datafields
95
+ @datafields ||=
96
+ begin
97
+ @badgerfish_hash["OAI-PMH"]["ListRecords"]["record"]["metadata"]["record"]["datafield"].try do |datafields|
98
+ datafields.is_a?(Array) ? datafields : [datafields]
99
+ end || []
100
+ end
101
+ end
75
102
  end
@@ -1,56 +1,116 @@
1
- class Mab2::Document::Scope
1
+ require "active_support"
2
+ require "active_support/core_ext"
3
+ require "mab2/document"
2
4
 
3
- def initialize(document)
4
- @document = document
5
+ class Mab2::Document::Scope
6
+ class Subfield < Struct.new(:code, :value)
7
+ alias_method :name, :code
5
8
  end
6
9
 
7
- def controlfield(tag)
8
- @document.controlfields_by(tag: tag)
10
+ def initialize(options = {})
11
+ @document = options[:document]
12
+ @ind1 = [options[:ind1]].flatten(1).compact
13
+ @ind2 = [options[:ind2]].flatten(1).compact
14
+ @tag = options[:tag]
15
+ @subfield = [options[:subfield]].flatten(1).compact
16
+
17
+ # negation extraction
18
+ @ind1.delete_if do |ind1|
19
+ if ind1.start_with?("-") && ind1.length > 1
20
+ (@not_ind1 ||= []).push(ind1[1..-1])
21
+ true
22
+ end
23
+ end
9
24
  end
10
25
 
11
- def datafield(tag, indicators = {})
12
- @scope_datafields = @document.datafields_by(tag: tag, indicators: indicators)
26
+ def get
13
27
  self
14
28
  end
15
29
 
16
- def subfield(subfield_codes)
17
- subfield_codes = subfield_codes.is_a?(Array) ? subfield_codes : [subfield_codes] # subfield_codes should be an array
30
+ def get_subfield(code)
31
+ [datafields.first["subfield"]].compact.flatten(1)
32
+ .try(:find) do |subfield|
33
+ subfield["@code"] == code
34
+ end
35
+ .try do |subfield|
36
+ Subfield.new(subfield["@code"], subfield["$"])
37
+ end || Subfield.new
38
+ end
18
39
 
19
- @scope_subfields = @scope_datafields.map do |datafield|
20
- datafield.subfields.select { |subfield| subfield_codes.include? subfield.name.to_s }
21
- end.flatten! || []
40
+ def fields
41
+ datafields.map do |datafield|
42
+ self.class.new({
43
+ document: @document,
44
+ ind1: datafield["@ind1"],
45
+ ind2: datafield["@ind2"],
46
+ tag: datafield["@tag"]
47
+ })
48
+ end
49
+ end
22
50
 
23
- self
51
+ def subfield(codes)
52
+ self.class.new(to_options.merge(subfield: codes))
53
+ end
54
+
55
+ def subfields
56
+ datafields.map do |datafield|
57
+ [datafield["subfield"]].compact.flatten(1).try(:map) do |subfield|
58
+ Subfield.new(subfield["@code"], subfield["$"])
59
+ end
60
+ end.compact.flatten(1) || []
24
61
  end
25
62
 
26
63
  def value(options = {})
27
- values.first
64
+ separator = options[:join_subfields] || " "
65
+
66
+ if @document.legacy_mabmapper_mode?
67
+ values.first.join(separator)
68
+ else
69
+ values.map(&:presence).compact.first.try(:join, separator)
70
+ end
28
71
  end
29
72
 
30
73
  def values(options = {})
31
- unless @scope_values
32
- # if no subfields where specified so far, take all (e.g. doc.datafield('100').values))
33
- @scope_subfields ||= (@scope_datafields.map { |datafield| datafield.subfields }).flatten! || []
34
- subfield_values = @scope_subfields.map { |subfield| subfield.value }
35
- @scope_values = options[:join_subfields] ? subfield_values.join(options[:join_subfields]) : subfield_values
36
- else
37
- @scope_values
74
+ datafields
75
+ .map do |field|
76
+ [field["subfield"]].compact.flatten(1)
77
+ .select do |subfield|
78
+ @subfield.empty? || @subfield.include?(subfield["@code"])
79
+ end
80
+ .map do |subfield|
81
+ subfield["$"]
82
+ end
83
+ .try do |subfield_values|
84
+ if separator = options[:join_subfields]
85
+ subfield_values.join(separator)
86
+ else
87
+ subfield_values
88
+ end
89
+ end
38
90
  end
39
91
  end
40
92
 
41
93
  #
42
- # compatibility
94
+ private
43
95
  #
44
- def get
45
- self
46
- end
47
-
48
- def fields
49
- @scope_datafields
96
+ def datafields
97
+ @document.datafields_by_tag(@tag)
98
+ .dup
99
+ .tap do |fields|
100
+ fields.select! do |field|
101
+ (@ind1.empty? || @ind1.include?(field["@ind1"]) || (@not_ind1.present? && !@not_ind1.include?(field["@ind1"]))) &&
102
+ (@ind2.empty? || @ind2.include?(field["@ind2"]))
103
+ end
104
+ end
50
105
  end
51
106
 
52
- def subfields
53
- @scope_subfields
107
+ def to_options
108
+ {
109
+ document: @document,
110
+ ind1: @ind1,
111
+ ind2: @ind2,
112
+ tag: @tag,
113
+ subfield: @subfield
114
+ }
54
115
  end
55
-
56
116
  end
@@ -1,3 +1,3 @@
1
1
  module Mab2
2
- VERSION = "0.0.4"
2
+ VERSION = "0.1.0"
3
3
  end
@@ -1,29 +1,27 @@
1
1
  # coding: utf-8
2
- lib = File.expand_path('../lib', __FILE__)
2
+ lib = File.expand_path("../lib", __FILE__)
3
3
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
- require 'mab2/version'
4
+ require "mab2/version"
5
5
 
6
6
  Gem::Specification.new do |spec|
7
- spec.name = 'mab2'
7
+ spec.name = "mab2"
8
8
  spec.version = Mab2::VERSION
9
9
  spec.authors = ["Michael Sievers"]
10
- spec.email = ["michael_sievers@web.de"]
11
- spec.description = %q{Simplifies access and handling of MAB documents}
10
+ spec.description = %q{Simplifies access and handling of Mab documents}
12
11
  spec.summary = %q{Implementation if the mabmapper DSL}
13
- spec.homepage = 'https://github.com/msievers/mab2'
14
- spec.license = 'MIT'
12
+ spec.homepage = "https://github.com/msievers/mab2"
13
+ spec.license = "MIT"
15
14
 
16
15
  spec.files = `git ls-files`.split($/)
17
16
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
17
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
18
  spec.require_paths = ["lib"]
20
19
 
21
- spec.add_dependency 'nokogiri', '~> 1.6.0'
20
+ spec.add_dependency "activesupport"
21
+ spec.add_dependency "badgerfish", ">= 0.2.0"
22
22
 
23
- spec.add_development_dependency 'activesupport', '>= 3.2.0', '< 4.0.0'
24
- spec.add_development_dependency 'bundler', '~> 1.3'
25
- spec.add_development_dependency 'minitest', '~> 5.0.6'
26
- spec.add_development_dependency 'pry', '~> 0.9.12.2'
27
- spec.add_development_dependency 'pry-nav', '~> 0.2.3'
28
- spec.add_development_dependency 'rake'
23
+ spec.add_development_dependency "bundler", "~> 1.7"
24
+ spec.add_development_dependency "rake"
25
+ spec.add_development_dependency "rspec", ">= 3.0.0"
26
+ spec.add_development_dependency "simplecov", ">= 0.8.0"
29
27
  end
@@ -4,32 +4,24 @@
4
4
  <record>
5
5
  <metadata>
6
6
  <record xmlns="http://www.ddb.de/professionell/mabxml/mabxml-1.xsd">
7
- <datafield tag="PSW" ind1="-" ind2="1">
8
- <subfield code="s">Politik</subfield>
9
- <subfield code="x">Theorie</subfield>
10
- <subfield code="9">(DE-588)4046563-9</subfield>
7
+ <controlfield tag="050">a|||||||||||||</controlfield>
8
+ <datafield tag="100" ind1="1" ind2="2">
9
+ <subfield code="a">Some string</subfield>
11
10
  </datafield>
12
- <datafield tag="PSW" ind1="-" ind2="1">
11
+ <datafield tag="PSW" ind1="1" ind2="1">
13
12
  <subfield code="s">Politiktheorie</subfield>
14
13
  <subfield code="9">(DE-588)4046563-9</subfield>
15
14
  </datafield>
16
- <datafield tag="PSW" ind1="-" ind2="1">
17
- <subfield code="s">Politische Theorien</subfield>
18
- <subfield code="9">(DE-588)4046563-9</subfield>
19
- </datafield>
20
15
  <datafield tag="PSW" ind1="-" ind2="1">
21
16
  <subfield code="s">Politik</subfield>
22
17
  <subfield code="x">Theorie</subfield>
23
18
  <subfield code="9">(DE-588)4046563-9</subfield>
24
19
  </datafield>
25
- <datafield tag="PSW" ind1="-" ind2="1">
26
- <subfield code="s">Politiktheorie</subfield>
27
- <subfield code="9">(DE-588)4046563-9</subfield>
28
- </datafield>
29
- <datafield tag="PSW" ind1="-" ind2="1">
20
+ <datafield tag="PSW" ind1="1" ind2="2">
30
21
  <subfield code="s">Politische Theorien</subfield>
31
22
  <subfield code="9">(DE-588)4046563-9</subfield>
32
23
  </datafield>
24
+ <controlfield tag="SYS">000978177</controlfield>
33
25
  </record>
34
26
  </metadata>
35
27
  </record>
@@ -0,0 +1,34 @@
1
+ describe Mab2::Document::Scope::Subfield do
2
+ let(:code) { "s" }
3
+ let(:value) { "Politiktheorie" }
4
+
5
+ let(:subfield) do
6
+ described_class.new(code, value)
7
+ end
8
+
9
+ describe "#initialize" do
10
+ context "if callend with a code and a value" do
11
+ it "returns an instance of #{described_class}" do
12
+ expect(subfield).to be_a(described_class)
13
+ end
14
+ end
15
+ end
16
+
17
+ describe "#code" do
18
+ it "returns the code of the subfield" do
19
+ expect(subfield.code).to eq(code)
20
+ end
21
+ end
22
+
23
+ describe "#name" do
24
+ it "is an alias for #code" do
25
+ expect(subfield.name).to eq(subfield.code)
26
+ end
27
+ end
28
+
29
+ describe "#value" do
30
+ it "returns the value of the subfield" do
31
+ expect(subfield.value).to eq(value)
32
+ end
33
+ end
34
+ end