mab2 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 14bac691353f086cfbed41622955c11ca4ff06a2
4
- data.tar.gz: 8277e15586b7f2df280b65081e8def54082b1b34
3
+ metadata.gz: cbecb44d9edd1e649510ee35cc401277ba3cff9d
4
+ data.tar.gz: 9aabd361f1ba9b69727b08435176d8a2e9c37d37
5
5
  SHA512:
6
- metadata.gz: 971bb7293817bf77fb0de53a486a3200b1530c1d07a60ff9279ab1e7c00ecdb36fc990e6e9266afac1487f97787d870ec5e92c40587b34d7a05d66f6155f9e80
7
- data.tar.gz: d4985798177570de52a038c6d53c47cfa628ee8360968a95aa0fe638de732255a4819e0aa6cdc2aefb52d1a4338dceabbd622a3586a8bb6549a1fb547e3e6ee5
6
+ metadata.gz: a3f6292cd094c7a5834cd34e63e5983b9f47de5eef2520f1d47dc8dc4f6da073545052b2b570e7e36892c467dba377694cac7756eb8edbe00437d9e26907b680
7
+ data.tar.gz: e696b3d6612c4f1840db905eed35a0873de66c2c684806e1f5e245b2eac55b436f5ba0c80b86e8d59c0cc89887aea1c64d8bc5682c7a7641e6019283ca5cbd44
data/Gemfile CHANGED
@@ -1,4 +1,6 @@
1
1
  source 'https://rubygems.org'
2
2
 
3
+ gem 'badgerfish', path: '~/github/msievers/badgerfish'
4
+
3
5
  # Specify your gem's dependencies in mab2.gemspec
4
6
  gemspec
data/lib/mab2/document.rb CHANGED
@@ -1,23 +1,22 @@
1
- require 'badgerfish'
2
1
  require 'mab2/document_scope'
2
+ require 'nokogiri'
3
3
 
4
4
  module Mab2
5
5
  class Document
6
6
 
7
+ attr_accessor :nokogiri_document
8
+
7
9
  # we assume someone calls us like this Mab2::Document.new('<xml>...'</xml>')
8
10
  def initialize(xml)
9
- badgerfish_representation = Badgerfish::Parser.new.load(xml)
10
-
11
- @controlfield = controlfield_from_badgerfish(badgerfish_representation)
12
- @datafield = datafield_from_badgerfish(badgerfish_representation)
11
+ @nokogiri_document = Nokogiri::XML(xml).remove_namespaces!.css('OAI-PMH ListRecords record metadata record')
13
12
  end
14
13
 
15
14
  def controlfield(tag)
16
- @controlfield[tag].chars
15
+ Mab2::DocumentScope.new(self).controlfield(tag)
17
16
  end
18
17
 
19
18
  def datafield(tag, options = {})
20
- Mab2::DocumentScope.new(self).datafield(tag, options)
19
+ Mab2::DocumentScope.new(self).datafield(tag, options)
21
20
  end
22
21
 
23
22
  # mabmapper uses field instead of datafield
@@ -25,45 +24,5 @@ module Mab2
25
24
  datafield(tag, options)
26
25
  end
27
26
 
28
- private
29
-
30
- def controlfield_from_badgerfish(badgerfish)
31
- hash = {}
32
-
33
- badgerfish['OAI-PMH']['ListRecords']['record']['metadata']['record']['controlfield'].each do |controlfield|
34
- hash[controlfield["@tag"]] = controlfield["$"]
35
- end
36
-
37
- hash
38
- end
39
-
40
- def datafield_from_badgerfish(badgerfish)
41
- hash = {}
42
-
43
- badgerfish['OAI-PMH']['ListRecords']['record']['metadata']['record']['datafield'].each do |datafield|
44
- if hash[datafield["@tag"]].nil?
45
- hash[datafield["@tag"]] = { datafield["@ind1"] => { datafield["@ind2"] => {} } }
46
- elsif hash[datafield["@tag"]].is_a?(Hash)
47
- hash[datafield["@tag"]] = [ hash[datafield["@tag"]] ]
48
- end
49
-
50
- if hash[datafield["@tag"]].is_a?(Array)
51
- hash[datafield["@tag"]].push({ datafield["@ind1"] => { datafield["@ind2"] => {} } })
52
- end
53
-
54
- subfields = datafield['subfield'].is_a?(Array) ? datafield['subfield'] : [datafield['subfield']]
55
-
56
- subfields.each do |subfield|
57
- if hash[datafield["@tag"]].is_a?(Array)
58
- hash[datafield["@tag"]].last[datafield["@ind1"]][datafield["@ind2"]][subfield["@code"]] = subfield["$"]
59
- else
60
- hash[datafield["@tag"]][datafield["@ind1"]][datafield["@ind2"]][subfield["@code"]] = subfield["$"]
61
- end
62
- end
63
- end
64
-
65
- hash
66
- end
67
-
68
27
  end
69
28
  end
@@ -1,62 +1,64 @@
1
- require 'active_support/core_ext'
1
+ class Mab2::DocumentScope
2
2
 
3
- module Mab2
4
- class DocumentScope
5
-
6
- attr_accessor :datafield, :document
7
-
8
- def initialize(document)
9
- @document = document
10
- end
3
+ def initialize(scoped_document)
4
+ @doc = scoped_document.nokogiri_document
5
+ end
11
6
 
12
- def datafield(tag, options = {})
13
- @datafield = tag
14
- @ind1 = options[:ind1] unless options[:ind1].nil?
15
- @ind2 = options[:ind2] unless options[:ind2].nil?
16
- self
17
- end
7
+ def controlfield(tag)
8
+ @doc.css("controlfield[tag='#{tag}']").text.chars
9
+ end
18
10
 
19
- def get
20
- self
21
- end
11
+ def datafield(tag, indicators)
12
+ @tag = tag
13
+ @indicators = indicators.inject({}) { |hash, (k, v)| hash[k] = (v.is_a?(Array) ? v : [v]); hash }
14
+ self
15
+ end
22
16
 
23
- def subfield(code)
24
- @subfield = code
25
- self
26
- end
17
+ def get
18
+ self
19
+ end
27
20
 
28
- def value
29
- datafield = @document.instance_variable_get(:@datafield)
30
- ind1 = ind1_accessor
31
- ind2 = ind2_accessor
32
- subfield = subfield_accessor
21
+ def subfield(subfield_codes)
22
+ @subfield_codes = subfield_codes.is_a?(Array) ? subfield_codes : [subfield_codes]
23
+ self
24
+ end
33
25
 
34
- # handle case where ind1/ind2/subfield are arrays -> since we are in .value, simple grab the first non-nill one
35
- ind1 = ind1.select { |indicator| !datafield.try(:[], @datafield).try(:[], indicator).nil? }.first if ind1.is_a?(Array)
36
- ind2 = ind2.select { |indicator| !datafield.try(:[], @datafield).try(:[], ind1).try(:[], indicator).nil? }.first if ind2.is_a?(Array)
37
- subfield = subfield.select { |_subfield| !datafield.try(:[], @datafield).try(:[], ind1).try(:[], ind2).try(:[], _subfield).nil? }.first if subfield.is_a?(Array)
26
+ def value
27
+ values.first
28
+ end
38
29
 
39
- if !(datafield.nil? || ind1.nil? || ind2.nil? || subfield.nil?)
40
- datafield.try(:[], @datafield).try(:[], ind1).try(:[], ind2).try(:[], subfield)
30
+ def values
31
+ datafield_selectors = []
32
+
33
+ if @indicators[:ind1].present? && @indicators[:ind2].blank?
34
+ @indicators[:ind1].each do |ind1_value|
35
+ datafield_selectors.push("datafield[tag='#{@tag}'][ind1='#{ind1_value}']")
41
36
  end
37
+ elsif @indicators[:ind1].blank? && @indicators[:ind2].present?
38
+ @indicators[:ind2].each do |ind2_value|
39
+ datafield_selectors.push("datafield[tag='#{@tag}'][ind2='#{ind2_value}']")
40
+ end
41
+ elsif @indicators[:ind1].present? && @indicators[:ind2].present?
42
+ @indicators[:ind1].each do |ind1_value|
43
+ @indicators[:ind2].each do |ind2_value|
44
+ datafield_selectors.push("datafield[tag='#{@tag}'][ind1='#{ind1_value}'][ind2='#{ind2_value}']")
45
+ end
46
+ end
47
+ else
48
+ datafield_selectors.push("datafield[tag='#{@tag}']")
42
49
  end
43
50
 
44
- def values
45
- end
46
-
47
- protected
48
-
49
- def ind1_accessor
50
- @ind1 || @document.instance_variable_get(:@datafield)[@datafield].keys.first unless @datafield.nil?
51
- end
52
-
53
- def ind2_accessor
54
- @ind2 || @document.instance_variable_get(:@datafield)[@datafield][ind1_accessor].keys.first unless @datafield.nil? && ind1_accessor.nil?
55
- end
56
-
57
- def subfield_accessor
58
- @subfield || @document.instance_variable_get(:@datafield)[@datafield][ind1_accessor][ind2_accessor].keys.first unless @datafield.nil? && ind1_accessor.nil? && ind2_accessor.nil?
59
- end
51
+ css_selector = if @subfield_codes.present?
52
+ datafield_selectors.map do |selector|
53
+ @subfield_codes.map do |subfield_code|
54
+ "#{selector} subfield[code='#{subfield_code}']"
55
+ end
56
+ end
57
+ else
58
+ indicator_selectors
59
+ end.flatten.join(', ')
60
60
 
61
+ @doc.css(css_selector).map(&:text)
61
62
  end
63
+
62
64
  end
data/lib/mab2/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Mab2
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
data/mab2.gemspec CHANGED
@@ -18,7 +18,7 @@ Gem::Specification.new do |spec|
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
19
  spec.require_paths = ["lib"]
20
20
 
21
- spec.add_dependency 'badgerfish', '~> 0.0.3'
21
+ spec.add_dependency 'nokogiri', '~> 1.6.0'
22
22
 
23
23
  spec.add_development_dependency 'activesupport', '>= 3.2.0', '< 5.0.0'
24
24
  spec.add_development_dependency 'bundler', '~> 1.3'
@@ -14,7 +14,7 @@ module TestDatafield
14
14
 
15
15
  def test_datafield_3
16
16
  doc = Mab2::Document.new(load_mab('PAD01.000978177.PRIMO.xml'))
17
- w = doc.field('PPE').subfield(['a', 'p']).get.values
17
+ assert_equal ["Bajme, Klaus", "Beyme, Klaus", "Beyme, K. <<von>>", "Bajme, Klaus", "Beyme, Klaus", "Beyme, K. <<von>>"], doc.field('PPE').subfield(['a', 'p']).get.values
18
18
  end
19
19
 
20
20
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mab2
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Sievers
@@ -11,19 +11,19 @@ cert_chain: []
11
11
  date: 2013-09-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: badgerfish
14
+ name: nokogiri
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - ~>
18
18
  - !ruby/object:Gem::Version
19
- version: 0.0.3
19
+ version: 1.6.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ~>
25
25
  - !ruby/object:Gem::Version
26
- version: 0.0.3
26
+ version: 1.6.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: activesupport
29
29
  requirement: !ruby/object:Gem::Requirement