miniharvest 0.1.8 → 0.1.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5487f82a2fd55c10cc459970c1da3b7e79287f37
4
- data.tar.gz: 6a26b3fc11e17f9423bcf0d30ac454718a6593d1
3
+ metadata.gz: 791be58e05aa79cdfdb62221fde70d7fce93bed9
4
+ data.tar.gz: 3373ff3347f32cd77c8799e7d9ac5c94ff699e09
5
5
  SHA512:
6
- metadata.gz: e45e99872470ec95ace48e61f09cee3b9694302d1c857271d6f9a121a51f71fbb0a987f18282d785aff50e6cc70e8d5a394e5e279d358733807856357032be0c
7
- data.tar.gz: 120fe7fd9ab4039c48057b806711f6c5aac845d222d426c18175ac1c5d1045072cafa9a058e63d92f922de896d41da0a8fd691c23edc5d7c75d26ddb58e32b50
6
+ metadata.gz: a1859ee9734e1c3daffbc457e0b3da322bd24dda9e4df8fef89bbccb37b3e72f8aba07f7d1960f0eef4342b78f9025129cbcdc96d5400d3c4784904b491ce7a6
7
+ data.tar.gz: a273abf2e4ad986a458187bfae60102a7b41f4e08f4b0735951dc4f527280846acab4e2faecea92643e6b2452bbda9124f36a710ae01108487d6f9e7fb56e0ee
data/exe/miniharvest CHANGED
@@ -2,10 +2,7 @@
2
2
  require 'miniharvest'
3
3
  require 'nokogiri'
4
4
 
5
- mh = MiniHarvest::MiniHarvest.new
6
- mh.oai_base_uri = ARGV[0]
7
- mh.set = ARGV[1]
8
- mh.from = ARGV[2]
5
+ mh = MiniHarvest::MiniHarvest.new(ARGV[0],ARGV[1],ARGV[2])
9
6
  mh.marc_collection = Nokogiri::XML('<collection xmlns="http://www.loc.gov/MARC21/slim">')
10
- marc_xml = mh.get_records(mh.initial_request)
7
+ marc_xml = mh.get_records(mh.initial_token)
11
8
  File.write(ARGV[3], marc_xml.to_xml)
data/lib/miniharvest.rb CHANGED
@@ -1,48 +1,73 @@
1
1
  require "miniharvest/version"
2
2
 
3
+
3
4
  module MiniHarvest
4
5
  require 'net/http'
5
6
  require 'nokogiri'
6
- require 'open-uri'
7
+ require 'logger'
7
8
 
8
9
  class MiniHarvest
9
-
10
- attr_accessor :oai_base_uri, :set, :from, :marc_collection
10
+
11
+ attr_accessor :oai_base_uri, :set, :from, :marc_collection, :logger, :initial_res, :initial_token
12
+
13
+ def initialize(oai_base_uri, set, from)
14
+ @oai_base_uri = oai_base_uri
15
+ @set = set
16
+ @from = from
17
+ @marc_collection = Nokogiri::XML('<collection xmlns="http://www.loc.gov/MARC21/slim">')
18
+ @logger = Logger.new(STDOUT)
11
19
 
12
- def append_to_collection(records)
13
- self.marc_collection.root.add_child(records)
20
+ params = { :verb => 'ListRecords', :set => self.set, :metadataPrefix => 'marc21', :from => self.from }
21
+ @initial_res = oai_request(self.oai_base_uri,params)
22
+ @initial_token = get_resumption_token(@initial_res)
23
+ end
24
+
25
+ def append_to_collection(records)
26
+ self.marc_collection.root.add_child(records)
14
27
  end
15
28
 
16
29
  def oai_request(uri,params)
17
30
  oai_uri = URI(uri)
18
31
  oai_uri.query = URI.encode_www_form(params)
19
- return Net::HTTP.get_response(oai_uri)
20
- end
32
+ @logger.info(oai_uri.query)
33
+
34
+ res = Net::HTTP.get_response(oai_uri)
21
35
 
22
- def process_request(res)
23
36
  doc = Nokogiri::XML(res.body)
24
37
  doc.remove_namespaces!
25
- records = doc.xpath('//record')
26
- append_to_collection(records)
27
-
38
+
39
+ return doc
40
+ end
41
+
42
+ def get_resumption_token(doc)
28
43
  resumption_token = doc.xpath('//resumptionToken')
29
- return resumption_token.text
44
+ if resumption_token.length == 1
45
+ return resumption_token
46
+ else
47
+ return false
48
+ end
30
49
  end
31
-
32
- def initial_request
33
- params = { :verb => 'ListRecords', :set => self.set, :metadataPrefix => 'marc21', :from => self.from }
34
- res = oai_request(self.oai_base_uri,params)
35
- get_records(process_request(res))
50
+
51
+ def process_request(doc)
52
+ token = get_resumption_token(doc)
53
+ if token != false
54
+ records = doc.xpath('//record')
55
+ append_to_collection(records)
56
+ return token
57
+ else
58
+ return false
59
+ end
36
60
  end
37
61
 
38
62
  def get_records(token)
39
- params = { :verb => 'ListRecords', :resumptionToken => token }
40
- res = oai_request(self.oai_base_uri,params)
41
- resumption_token = process_request(res)
42
- if resumption_token != ""
63
+ params = { :verb => 'ListRecords', :resumptionToken => token[0].text }
64
+ doc = oai_request(self.oai_base_uri,params)
65
+ resumption_token = process_request(doc)
66
+
67
+ if resumption_token != false
43
68
  get_records(resumption_token)
44
69
  else
45
- return self.marc_collection
70
+ return self.marc_collection.root
46
71
  end
47
72
  end
48
73
  end
@@ -1,3 +1,3 @@
1
1
  module MiniHarvest
2
- VERSION = "0.1.8"
2
+ VERSION = "0.1.9"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miniharvest
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.8
4
+ version: 0.1.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jamie Little
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-08-01 00:00:00.000000000 Z
11
+ date: 2016-08-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler