archivist-client 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/README.md +107 -0
- data/archivist-client.gemspec +37 -0
- data/lib/archivist.rb +2 -0
- data/lib/archivist/client/base.rb +63 -0
- data/lib/archivist/client/version.rb +5 -0
- data/lib/archivist/models.rb +7 -0
- data/lib/archivist/models/body.rb +15 -0
- data/lib/archivist/models/document.rb +48 -0
- data/lib/archivist/models/file_format.rb +18 -0
- data/lib/archivist/models/format_index.rb +20 -0
- data/lib/archivist/models/header.rb +15 -0
- data/lib/archivist/models/parameters.rb +17 -0
- data/lib/archivist/models/query_response.rb +23 -0
- data/lib/archivist/representations.rb +9 -0
- data/lib/archivist/representations/body.rb +19 -0
- data/lib/archivist/representations/document.rb +17 -0
- data/lib/archivist/representations/file_format.rb +21 -0
- data/lib/archivist/representations/format_index.rb +17 -0
- data/lib/archivist/representations/header.rb +19 -0
- data/lib/archivist/representations/parameters.rb +19 -0
- data/lib/archivist/representations/query_response.rb +21 -0
- data/spec/client_spec.rb +17 -0
- data/spec/fixtures/document.json +7 -0
- data/spec/fixtures/download_files.xml +109 -0
- data/spec/fixtures/query-response.json +1 -0
- data/spec/fixtures/vcr/download.yml +1506 -0
- data/spec/fixtures/vcr/links.yml +127 -0
- data/spec/fixtures/vcr/search.yml +110 -0
- data/spec/models/body_spec.rb +7 -0
- data/spec/models/document_spec.rb +27 -0
- data/spec/models/file_format_spec.rb +9 -0
- data/spec/models/format_index_spec.rb +9 -0
- data/spec/models/header_spec.rb +7 -0
- data/spec/models/parameters_spec.rb +7 -0
- data/spec/models/query_response_spec.rb +7 -0
- data/spec/representations/document_spec.rb +17 -0
- data/spec/representations/format_index_spec.rb +32 -0
- data/spec/representations/query_response_spec.rb +28 -0
- data/spec/spec_helper.rb +12 -0
- metadata +272 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
OTNjMjQ1ZTA3M2RkNzA3NzA0MDg1ZjY2MmMwMzYwODYzYzM1ZTZhMA==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
ZGYzZGU5NjBlNzI4NGNkZGM4NmVkNWU1NjY0NTM3OGRhM2ZhYjM4OQ==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
MDQ4YzFhNzM2YjE2YjdhYmU1YTU2MzJlZDE4Nzc4NDM0YmJlZWUyYTI1OTdj
|
10
|
+
ZTUwNzk0MmUyOTdmMjRjMjYzZmVmMDM0NjhiZjA5ZDc4NmMyNzQ5YzdiM2Nh
|
11
|
+
NmZhNDVjYTQyNjQ3YjI4MzQ4ZmE0YTY4ZjRjOGNkZTU2MjFjYmU=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
NGYzZmVmNTY2ZmEzZDM0NGI0OTJhOTNmZWUyNjViYTk3MGI1NzgxMWYxNTI1
|
14
|
+
NTU0YWE1YjJhYzgxOTY3ZDcyNzFiZWMxZGI5ZDZkNDJjM2Y1YmRiNmY0MTZh
|
15
|
+
M2M5M2EzZWNiZjNiNTlkMzkxNDBlYzU2ZTBmN2EyNDY2NWJlYWQ=
|
data/README.md
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
Archivist::Client
|
2
|
+
=========
|
3
|
+
|
4
|
+
an *archive.org* Ruby client
|
5
|
+
|
6
|
+
| Project | Archive::Client |
|
7
|
+
|------------------------ | ----------------- |
|
8
|
+
| gem name | archive-client |
|
9
|
+
| license | MIT |
|
10
|
+
| moldiness | [](http://stillmaintained.com/wordtreefoundation/archive-client) |
|
11
|
+
| version | [](http://badge.fury.io/rb/archive-client) |
|
12
|
+
| dependencies | [](https://gemnasium.com/wordtreefoundation/archive-client) |
|
13
|
+
| code quality | [](https://codeclimate.com/github/wordtreefoundation/archive-client) |
|
14
|
+
| continuous integration | [](https://travis-ci.org/wordtreefoundation/archive-client) |
|
15
|
+
| test coverage | [](https://coveralls.io/r/wordtreefoundation/archive-client) |
|
16
|
+
| homepage | [https://github.com/wordtreefoundation/archive-client][homepage] |
|
17
|
+
| documentation | [http://rdoc.info/github/wordtreefoundation/archive-client/frames][documentation] |
|
18
|
+
| authors | [Duane Johnson](https://coderbits.com/canadaduane) [](http://coderwall.com/canadaduane) |
|
19
|
+
| | [Peter Boling](https://coderbits.com/pboling) [](http://coderwall.com/pboling) |
|
20
|
+
|
21
|
+
|
22
|
+
Getting Started
|
23
|
+
---------------
|
24
|
+
|
25
|
+
```ruby
|
26
|
+
require 'archive-client'
|
27
|
+
```
|
28
|
+
|
29
|
+
Create an Archivist client:
|
30
|
+
```ruby
|
31
|
+
client = Archivist::Client::Base.new
|
32
|
+
```
|
33
|
+
|
34
|
+
Search for the books you're interested in:
|
35
|
+
```ruby
|
36
|
+
books = client.search(:start_year => 1500, :end_year => 1510)
|
37
|
+
```
|
38
|
+
|
39
|
+
Download them:
|
40
|
+
```ruby
|
41
|
+
books.each do |book|
|
42
|
+
puts book.download
|
43
|
+
end
|
44
|
+
```
|
45
|
+
|
46
|
+
## Contributors
|
47
|
+
|
48
|
+
See the [Network View](https://github.com/wordtreefoundation/archive-client/network) and the [CHANGELOG](https://github.com/wordtreefoundation/archive-client/blob/master/CHANGELOG.md)
|
49
|
+
|
50
|
+
## How you can help!
|
51
|
+
|
52
|
+
Take a look at the `reek` list which is the file called `REEK` and stat fixing things. Once you complete a change, run the tests:
|
53
|
+
|
54
|
+
```
|
55
|
+
bundle exec rspec spec
|
56
|
+
```
|
57
|
+
|
58
|
+
If the tests pass refresh the `reek` list:
|
59
|
+
|
60
|
+
```
|
61
|
+
bundle exec rake reek > REEK
|
62
|
+
```
|
63
|
+
|
64
|
+
Follow the instructions for "Contributing" below.
|
65
|
+
|
66
|
+
## Contributing
|
67
|
+
|
68
|
+
1. Fork it
|
69
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
70
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
71
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
72
|
+
5. Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
73
|
+
6. Create new Pull Request
|
74
|
+
|
75
|
+
## Versioning
|
76
|
+
|
77
|
+
This library aims to adhere to [Semantic Versioning 2.0.0][semver].
|
78
|
+
Violations of this scheme should be reported as bugs. Specifically,
|
79
|
+
if a minor or patch version is released that breaks backward
|
80
|
+
compatibility, a new version should be immediately released that
|
81
|
+
restores compatibility. Breaking changes to the public API will
|
82
|
+
only be introduced with new major versions.
|
83
|
+
|
84
|
+
As a result of this policy, you can (and should) specify a
|
85
|
+
dependency on this gem using the [Pessimistic Version Constraint][pvc] with two digits of precision.
|
86
|
+
|
87
|
+
For example:
|
88
|
+
|
89
|
+
spec.add_dependency 'archivist-client', '~> 0.1.0'
|
90
|
+
|
91
|
+
## References
|
92
|
+
|
93
|
+
* [Source Code](http://github.com/wordtreefoundation/archive-client)
|
94
|
+
* [Release Announcement](http://www.textual-analysis.org/2013/11/archivist/)
|
95
|
+
|
96
|
+
## Legal
|
97
|
+
|
98
|
+
* MIT License - See LICENSE file in this project
|
99
|
+
* Copyright (c) 2013 Duane Johnson & Word Tree Foundation
|
100
|
+
|
101
|
+
[semver]: http://semver.org/
|
102
|
+
[pvc]: http://docs.rubygems.org/read/chapter/16#page74
|
103
|
+
[documentation]: http://rdoc.info/github/wordtreefoundation/archive-client/frames
|
104
|
+
[homepage]: https://github.com/wordtreefoundation/archive-client
|
105
|
+
|
106
|
+
[](https://bitdeli.com/free "Bitdeli Badge")
|
107
|
+
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'archivist/client/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = "archivist-client"
|
8
|
+
gem.summary = "archivist-client is an archive.org client library"
|
9
|
+
gem.description = "archivist-client queries archive.org for book data and downloads some things."
|
10
|
+
gem.homepage = "http://github.com/canadaduane/archivist-client"
|
11
|
+
gem.authors = ['Duane Johnson','Peter Boling']
|
12
|
+
gem.email = ['duane.johnson@gmail.com', 'peter.boling@gmail.com']
|
13
|
+
gem.licenses = ["MIT"]
|
14
|
+
|
15
|
+
gem.files = %w[archivist-client.gemspec README.md]
|
16
|
+
gem.files += Dir.glob("lib/**/*.rb")
|
17
|
+
gem.files += Dir.glob("spec/**/*")
|
18
|
+
|
19
|
+
gem.test_files = Dir.glob("spec/**/*")
|
20
|
+
gem.require_paths = ["lib"]
|
21
|
+
gem.version = Archivist::Client::VERSION
|
22
|
+
gem.required_ruby_version = '>= 1.9.0'
|
23
|
+
|
24
|
+
gem.add_development_dependency "rake"
|
25
|
+
gem.add_development_dependency "bundler", ">= 1.0.0"
|
26
|
+
gem.add_development_dependency "rspec", "~> 2.6"
|
27
|
+
gem.add_development_dependency "debugger"
|
28
|
+
gem.add_development_dependency "webmock"
|
29
|
+
gem.add_development_dependency "vcr"
|
30
|
+
gem.add_development_dependency "reek", ">= 1.2.8"
|
31
|
+
gem.add_development_dependency "roodi", ">= 2.1.0"
|
32
|
+
|
33
|
+
gem.add_dependency "virtus", "~> 1.0.0"
|
34
|
+
gem.add_dependency "representable"
|
35
|
+
gem.add_dependency "faraday"
|
36
|
+
gem.add_dependency "faraday_middleware"
|
37
|
+
end
|
data/lib/archivist.rb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'faraday'
|
2
|
+
require 'faraday_middleware'
|
3
|
+
require 'archivist/representations'
|
4
|
+
|
5
|
+
module Archivist
|
6
|
+
module Client
|
7
|
+
class Base
|
8
|
+
attr_reader :conn
|
9
|
+
|
10
|
+
def initialize(opts = {})
|
11
|
+
@opts = {
|
12
|
+
page: 1,
|
13
|
+
rows: 50
|
14
|
+
}.merge(opts)
|
15
|
+
|
16
|
+
@conn = Faraday.new(url: 'http://archive.org') do |faraday|
|
17
|
+
faraday.use FaradayMiddleware::FollowRedirects
|
18
|
+
faraday.request :url_encoded # form-encode POST params
|
19
|
+
faraday.adapter Faraday.default_adapter # make requests with Net::HTTP
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def query(opts)
|
24
|
+
filters = [
|
25
|
+
'mediatype:texts',
|
26
|
+
'-mediatype:collection'
|
27
|
+
]
|
28
|
+
|
29
|
+
filters.concat(opts.delete(:filters)) if opts[:filters]
|
30
|
+
|
31
|
+
filters << if opts[:language]
|
32
|
+
"language:#{opts.delete(:language)}"
|
33
|
+
else
|
34
|
+
'(language:eng OR language:English)'
|
35
|
+
end
|
36
|
+
|
37
|
+
if opts[:start_year] && opts[:end_year]
|
38
|
+
start_year = "#{opts.delete(:start_year)}-01-01"
|
39
|
+
end_year = "#{opts.delete(:end_year)}-12-31"
|
40
|
+
filters << "date:[#{start_year} TO #{end_year}]"
|
41
|
+
end
|
42
|
+
|
43
|
+
filters.join(' AND ')
|
44
|
+
end
|
45
|
+
|
46
|
+
def params(opts = {})
|
47
|
+
{
|
48
|
+
q: query(opts),
|
49
|
+
fl: %w(identifier title creator date language mediattype),
|
50
|
+
sort: ['date asc'],
|
51
|
+
output: 'json'
|
52
|
+
}.merge(@opts).merge(opts)
|
53
|
+
end
|
54
|
+
|
55
|
+
def search(opts = {})
|
56
|
+
Model::QueryResponse.new.tap do |qr|
|
57
|
+
response = @conn.get('/advancedsearch.php', params(opts))
|
58
|
+
Representation::QueryResponse.new(qr).from_json(response.body)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,7 @@
|
|
1
|
+
require 'archivist/models/body'
|
2
|
+
require 'archivist/models/document'
|
3
|
+
require 'archivist/models/file_format'
|
4
|
+
require 'archivist/models/format_index'
|
5
|
+
require 'archivist/models/header'
|
6
|
+
require 'archivist/models/parameters'
|
7
|
+
require 'archivist/models/query_response'
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'virtus'
|
2
|
+
require 'date'
|
3
|
+
|
4
|
+
module Archivist
|
5
|
+
module Model
|
6
|
+
class Document
|
7
|
+
include Virtus.model
|
8
|
+
|
9
|
+
attribute :identifier, String
|
10
|
+
attribute :title, String
|
11
|
+
attribute :date, Date
|
12
|
+
attribute :languages, Array[String]
|
13
|
+
attribute :creators, Array[String]
|
14
|
+
|
15
|
+
def initialize
|
16
|
+
@conn = Faraday.new(url: "http://archive.org") do |faraday|
|
17
|
+
faraday.use FaradayMiddleware::FollowRedirects
|
18
|
+
faraday.request :url_encoded # form-encode POST params
|
19
|
+
# faraday.response :logger # log requests to STDOUT
|
20
|
+
faraday.adapter Faraday.default_adapter # make requests with Net::HTTP
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def format_index
|
25
|
+
response = @conn.get(index_xml_path)
|
26
|
+
Model::FormatIndex.new.tap do |idx|
|
27
|
+
Representation::FormatIndex.new(idx).from_xml(response.body)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def download(format=:text)
|
32
|
+
# e.g. format_index.text_format
|
33
|
+
file_format = format_index.send(:"#{format}_format")
|
34
|
+
# e.g. /download/firstbooknapole00gruagoog/firstbooknapole00gruagoog_djvu.txt
|
35
|
+
@conn.get(download_path(file_format.name)).
|
36
|
+
body.force_encoding('UTF-8')
|
37
|
+
end
|
38
|
+
|
39
|
+
def download_path(file)
|
40
|
+
"/download/#{identifier}/#{file}"
|
41
|
+
end
|
42
|
+
|
43
|
+
def index_xml_path
|
44
|
+
download_path("#{identifier}_files.xml")
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'virtus'
|
2
|
+
|
3
|
+
module Archivist
|
4
|
+
module Model
|
5
|
+
class FileFormat
|
6
|
+
include Virtus.model
|
7
|
+
|
8
|
+
attribute :name
|
9
|
+
attribute :source
|
10
|
+
attribute :format
|
11
|
+
attribute :mtime
|
12
|
+
attribute :size
|
13
|
+
attribute :md5
|
14
|
+
attribute :crc32
|
15
|
+
attribute :sha1
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'virtus'
|
2
|
+
|
3
|
+
module Archivist
|
4
|
+
module Model
|
5
|
+
class FormatIndex
|
6
|
+
include Virtus.model
|
7
|
+
|
8
|
+
attribute :formats
|
9
|
+
|
10
|
+
def find(key)
|
11
|
+
formats.find{ |f| f.format == key }
|
12
|
+
end
|
13
|
+
alias_method :[], :find
|
14
|
+
|
15
|
+
def text_format
|
16
|
+
find("DjVuTXT")
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'virtus'
|
2
|
+
|
3
|
+
module Archivist
|
4
|
+
module Model
|
5
|
+
class Parameters
|
6
|
+
include Virtus.model
|
7
|
+
|
8
|
+
attribute :rows, Integer
|
9
|
+
attribute :format, String
|
10
|
+
attribute :start, Integer
|
11
|
+
attribute :q, String
|
12
|
+
attribute :qin, String
|
13
|
+
attribute :fl, String
|
14
|
+
attribute :sort, String
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'virtus'
|
2
|
+
|
3
|
+
require_relative 'header'
|
4
|
+
require_relative 'body'
|
5
|
+
|
6
|
+
module Archivist
|
7
|
+
module Model
|
8
|
+
class QueryResponse
|
9
|
+
include Virtus.model
|
10
|
+
|
11
|
+
attribute :header, Header
|
12
|
+
attribute :body, Body
|
13
|
+
|
14
|
+
def each(&block)
|
15
|
+
body && body.docs.each(&block)
|
16
|
+
end
|
17
|
+
|
18
|
+
def empty?
|
19
|
+
body && body.docs.empty?
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
require 'archivist/models'
|
2
|
+
|
3
|
+
require 'archivist/representations/body'
|
4
|
+
require 'archivist/representations/document'
|
5
|
+
require 'archivist/representations/file_format'
|
6
|
+
require 'archivist/representations/format_index'
|
7
|
+
require 'archivist/representations/header'
|
8
|
+
require 'archivist/representations/parameters'
|
9
|
+
require 'archivist/representations/query_response'
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'archivist/models'
|
2
|
+
require 'representable'
|
3
|
+
require 'representable/json'
|
4
|
+
|
5
|
+
require_relative 'document'
|
6
|
+
|
7
|
+
module Archivist
|
8
|
+
module Representation
|
9
|
+
class Body < Representable::Decorator
|
10
|
+
include Representable::JSON
|
11
|
+
|
12
|
+
property :num_found, as: "numFound"
|
13
|
+
property :start
|
14
|
+
collection :docs,
|
15
|
+
class: Model::Document,
|
16
|
+
decorator: Document
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'archivist/models'
|
2
|
+
require 'representable'
|
3
|
+
require 'representable/json'
|
4
|
+
|
5
|
+
module Archivist
|
6
|
+
module Representation
|
7
|
+
class Document < Representable::Decorator
|
8
|
+
include Representable::JSON
|
9
|
+
|
10
|
+
property :identifier
|
11
|
+
property :title
|
12
|
+
property :date
|
13
|
+
collection :languages, as: "language"
|
14
|
+
collection :creators, as: "creator"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|