miniharvest 0.1.8 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/exe/miniharvest +2 -5
- data/lib/miniharvest.rb +47 -22
- data/lib/miniharvest/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 791be58e05aa79cdfdb62221fde70d7fce93bed9
|
4
|
+
data.tar.gz: 3373ff3347f32cd77c8799e7d9ac5c94ff699e09
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a1859ee9734e1c3daffbc457e0b3da322bd24dda9e4df8fef89bbccb37b3e72f8aba07f7d1960f0eef4342b78f9025129cbcdc96d5400d3c4784904b491ce7a6
|
7
|
+
data.tar.gz: a273abf2e4ad986a458187bfae60102a7b41f4e08f4b0735951dc4f527280846acab4e2faecea92643e6b2452bbda9124f36a710ae01108487d6f9e7fb56e0ee
|
data/exe/miniharvest
CHANGED
@@ -2,10 +2,7 @@
|
|
2
2
|
require 'miniharvest'
|
3
3
|
require 'nokogiri'
|
4
4
|
|
5
|
-
mh = MiniHarvest::MiniHarvest.new
|
6
|
-
mh.oai_base_uri = ARGV[0]
|
7
|
-
mh.set = ARGV[1]
|
8
|
-
mh.from = ARGV[2]
|
5
|
+
mh = MiniHarvest::MiniHarvest.new(ARGV[0],ARGV[1],ARGV[2])
|
9
6
|
mh.marc_collection = Nokogiri::XML('<collection xmlns="http://www.loc.gov/MARC21/slim">')
|
10
|
-
marc_xml = mh.get_records(mh.
|
7
|
+
marc_xml = mh.get_records(mh.initial_token)
|
11
8
|
File.write(ARGV[3], marc_xml.to_xml)
|
data/lib/miniharvest.rb
CHANGED
@@ -1,48 +1,73 @@
|
|
1
1
|
require "miniharvest/version"
|
2
2
|
|
3
|
+
|
3
4
|
module MiniHarvest
|
4
5
|
require 'net/http'
|
5
6
|
require 'nokogiri'
|
6
|
-
require '
|
7
|
+
require 'logger'
|
7
8
|
|
8
9
|
class MiniHarvest
|
9
|
-
|
10
|
-
attr_accessor :oai_base_uri, :set, :from, :marc_collection
|
10
|
+
|
11
|
+
attr_accessor :oai_base_uri, :set, :from, :marc_collection, :logger, :initial_res, :initial_token
|
12
|
+
|
13
|
+
def initialize(oai_base_uri, set, from)
|
14
|
+
@oai_base_uri = oai_base_uri
|
15
|
+
@set = set
|
16
|
+
@from = from
|
17
|
+
@marc_collection = Nokogiri::XML('<collection xmlns="http://www.loc.gov/MARC21/slim">')
|
18
|
+
@logger = Logger.new(STDOUT)
|
11
19
|
|
12
|
-
|
13
|
-
|
20
|
+
params = { :verb => 'ListRecords', :set => self.set, :metadataPrefix => 'marc21', :from => self.from }
|
21
|
+
@initial_res = oai_request(self.oai_base_uri,params)
|
22
|
+
@initial_token = get_resumption_token(@initial_res)
|
23
|
+
end
|
24
|
+
|
25
|
+
def append_to_collection(records)
|
26
|
+
self.marc_collection.root.add_child(records)
|
14
27
|
end
|
15
28
|
|
16
29
|
def oai_request(uri,params)
|
17
30
|
oai_uri = URI(uri)
|
18
31
|
oai_uri.query = URI.encode_www_form(params)
|
19
|
-
|
20
|
-
|
32
|
+
@logger.info(oai_uri.query)
|
33
|
+
|
34
|
+
res = Net::HTTP.get_response(oai_uri)
|
21
35
|
|
22
|
-
def process_request(res)
|
23
36
|
doc = Nokogiri::XML(res.body)
|
24
37
|
doc.remove_namespaces!
|
25
|
-
|
26
|
-
|
27
|
-
|
38
|
+
|
39
|
+
return doc
|
40
|
+
end
|
41
|
+
|
42
|
+
def get_resumption_token(doc)
|
28
43
|
resumption_token = doc.xpath('//resumptionToken')
|
29
|
-
|
44
|
+
if resumption_token.length == 1
|
45
|
+
return resumption_token
|
46
|
+
else
|
47
|
+
return false
|
48
|
+
end
|
30
49
|
end
|
31
|
-
|
32
|
-
def
|
33
|
-
|
34
|
-
|
35
|
-
|
50
|
+
|
51
|
+
def process_request(doc)
|
52
|
+
token = get_resumption_token(doc)
|
53
|
+
if token != false
|
54
|
+
records = doc.xpath('//record')
|
55
|
+
append_to_collection(records)
|
56
|
+
return token
|
57
|
+
else
|
58
|
+
return false
|
59
|
+
end
|
36
60
|
end
|
37
61
|
|
38
62
|
def get_records(token)
|
39
|
-
params = { :verb => 'ListRecords', :resumptionToken => token }
|
40
|
-
|
41
|
-
resumption_token = process_request(
|
42
|
-
|
63
|
+
params = { :verb => 'ListRecords', :resumptionToken => token[0].text }
|
64
|
+
doc = oai_request(self.oai_base_uri,params)
|
65
|
+
resumption_token = process_request(doc)
|
66
|
+
|
67
|
+
if resumption_token != false
|
43
68
|
get_records(resumption_token)
|
44
69
|
else
|
45
|
-
return self.marc_collection
|
70
|
+
return self.marc_collection.root
|
46
71
|
end
|
47
72
|
end
|
48
73
|
end
|
data/lib/miniharvest/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miniharvest
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jamie Little
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-08-
|
11
|
+
date: 2016-08-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|