miniharvest 0.1.8 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5487f82a2fd55c10cc459970c1da3b7e79287f37
4
- data.tar.gz: 6a26b3fc11e17f9423bcf0d30ac454718a6593d1
3
+ metadata.gz: 791be58e05aa79cdfdb62221fde70d7fce93bed9
4
+ data.tar.gz: 3373ff3347f32cd77c8799e7d9ac5c94ff699e09
5
5
  SHA512:
6
- metadata.gz: e45e99872470ec95ace48e61f09cee3b9694302d1c857271d6f9a121a51f71fbb0a987f18282d785aff50e6cc70e8d5a394e5e279d358733807856357032be0c
7
- data.tar.gz: 120fe7fd9ab4039c48057b806711f6c5aac845d222d426c18175ac1c5d1045072cafa9a058e63d92f922de896d41da0a8fd691c23edc5d7c75d26ddb58e32b50
6
+ metadata.gz: a1859ee9734e1c3daffbc457e0b3da322bd24dda9e4df8fef89bbccb37b3e72f8aba07f7d1960f0eef4342b78f9025129cbcdc96d5400d3c4784904b491ce7a6
7
+ data.tar.gz: a273abf2e4ad986a458187bfae60102a7b41f4e08f4b0735951dc4f527280846acab4e2faecea92643e6b2452bbda9124f36a710ae01108487d6f9e7fb56e0ee
data/exe/miniharvest CHANGED
@@ -2,10 +2,7 @@
2
2
  require 'miniharvest'
3
3
  require 'nokogiri'
4
4
 
5
- mh = MiniHarvest::MiniHarvest.new
6
- mh.oai_base_uri = ARGV[0]
7
- mh.set = ARGV[1]
8
- mh.from = ARGV[2]
5
+ mh = MiniHarvest::MiniHarvest.new(ARGV[0],ARGV[1],ARGV[2])
9
6
  mh.marc_collection = Nokogiri::XML('<collection xmlns="http://www.loc.gov/MARC21/slim">')
10
- marc_xml = mh.get_records(mh.initial_request)
7
+ marc_xml = mh.get_records(mh.initial_token)
11
8
  File.write(ARGV[3], marc_xml.to_xml)
data/lib/miniharvest.rb CHANGED
@@ -1,48 +1,73 @@
1
1
  require "miniharvest/version"
2
2
 
3
+
3
4
  module MiniHarvest
4
5
  require 'net/http'
5
6
  require 'nokogiri'
6
- require 'open-uri'
7
+ require 'logger'
7
8
 
8
9
  class MiniHarvest
9
-
10
- attr_accessor :oai_base_uri, :set, :from, :marc_collection
10
+
11
+ attr_accessor :oai_base_uri, :set, :from, :marc_collection, :logger, :initial_res, :initial_token
12
+
13
+ def initialize(oai_base_uri, set, from)
14
+ @oai_base_uri = oai_base_uri
15
+ @set = set
16
+ @from = from
17
+ @marc_collection = Nokogiri::XML('<collection xmlns="http://www.loc.gov/MARC21/slim">')
18
+ @logger = Logger.new(STDOUT)
11
19
 
12
- def append_to_collection(records)
13
- self.marc_collection.root.add_child(records)
20
+ params = { :verb => 'ListRecords', :set => self.set, :metadataPrefix => 'marc21', :from => self.from }
21
+ @initial_res = oai_request(self.oai_base_uri,params)
22
+ @initial_token = get_resumption_token(@initial_res)
23
+ end
24
+
25
+ def append_to_collection(records)
26
+ self.marc_collection.root.add_child(records)
14
27
  end
15
28
 
16
29
  def oai_request(uri,params)
17
30
  oai_uri = URI(uri)
18
31
  oai_uri.query = URI.encode_www_form(params)
19
- return Net::HTTP.get_response(oai_uri)
20
- end
32
+ @logger.info(oai_uri.query)
33
+
34
+ res = Net::HTTP.get_response(oai_uri)
21
35
 
22
- def process_request(res)
23
36
  doc = Nokogiri::XML(res.body)
24
37
  doc.remove_namespaces!
25
- records = doc.xpath('//record')
26
- append_to_collection(records)
27
-
38
+
39
+ return doc
40
+ end
41
+
42
+ def get_resumption_token(doc)
28
43
  resumption_token = doc.xpath('//resumptionToken')
29
- return resumption_token.text
44
+ if resumption_token.length == 1
45
+ return resumption_token
46
+ else
47
+ return false
48
+ end
30
49
  end
31
-
32
- def initial_request
33
- params = { :verb => 'ListRecords', :set => self.set, :metadataPrefix => 'marc21', :from => self.from }
34
- res = oai_request(self.oai_base_uri,params)
35
- get_records(process_request(res))
50
+
51
+ def process_request(doc)
52
+ token = get_resumption_token(doc)
53
+ if token != false
54
+ records = doc.xpath('//record')
55
+ append_to_collection(records)
56
+ return token
57
+ else
58
+ return false
59
+ end
36
60
  end
37
61
 
38
62
  def get_records(token)
39
- params = { :verb => 'ListRecords', :resumptionToken => token }
40
- res = oai_request(self.oai_base_uri,params)
41
- resumption_token = process_request(res)
42
- if resumption_token != ""
63
+ params = { :verb => 'ListRecords', :resumptionToken => token[0].text }
64
+ doc = oai_request(self.oai_base_uri,params)
65
+ resumption_token = process_request(doc)
66
+
67
+ if resumption_token != false
43
68
  get_records(resumption_token)
44
69
  else
45
- return self.marc_collection
70
+ return self.marc_collection.root
46
71
  end
47
72
  end
48
73
  end
@@ -1,3 +1,3 @@
1
1
  module MiniHarvest
2
- VERSION = "0.1.8"
2
+ VERSION = "0.1.9"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miniharvest
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.8
4
+ version: 0.1.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jamie Little
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-08-01 00:00:00.000000000 Z
11
+ date: 2016-08-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler