archivist-client 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/README.md +107 -0
- data/archivist-client.gemspec +37 -0
- data/lib/archivist.rb +2 -0
- data/lib/archivist/client/base.rb +63 -0
- data/lib/archivist/client/version.rb +5 -0
- data/lib/archivist/models.rb +7 -0
- data/lib/archivist/models/body.rb +15 -0
- data/lib/archivist/models/document.rb +48 -0
- data/lib/archivist/models/file_format.rb +18 -0
- data/lib/archivist/models/format_index.rb +20 -0
- data/lib/archivist/models/header.rb +15 -0
- data/lib/archivist/models/parameters.rb +17 -0
- data/lib/archivist/models/query_response.rb +23 -0
- data/lib/archivist/representations.rb +9 -0
- data/lib/archivist/representations/body.rb +19 -0
- data/lib/archivist/representations/document.rb +17 -0
- data/lib/archivist/representations/file_format.rb +21 -0
- data/lib/archivist/representations/format_index.rb +17 -0
- data/lib/archivist/representations/header.rb +19 -0
- data/lib/archivist/representations/parameters.rb +19 -0
- data/lib/archivist/representations/query_response.rb +21 -0
- data/spec/client_spec.rb +17 -0
- data/spec/fixtures/document.json +7 -0
- data/spec/fixtures/download_files.xml +109 -0
- data/spec/fixtures/query-response.json +1 -0
- data/spec/fixtures/vcr/download.yml +1506 -0
- data/spec/fixtures/vcr/links.yml +127 -0
- data/spec/fixtures/vcr/search.yml +110 -0
- data/spec/models/body_spec.rb +7 -0
- data/spec/models/document_spec.rb +27 -0
- data/spec/models/file_format_spec.rb +9 -0
- data/spec/models/format_index_spec.rb +9 -0
- data/spec/models/header_spec.rb +7 -0
- data/spec/models/parameters_spec.rb +7 -0
- data/spec/models/query_response_spec.rb +7 -0
- data/spec/representations/document_spec.rb +17 -0
- data/spec/representations/format_index_spec.rb +32 -0
- data/spec/representations/query_response_spec.rb +28 -0
- data/spec/spec_helper.rb +12 -0
- metadata +272 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
OTNjMjQ1ZTA3M2RkNzA3NzA0MDg1ZjY2MmMwMzYwODYzYzM1ZTZhMA==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
ZGYzZGU5NjBlNzI4NGNkZGM4NmVkNWU1NjY0NTM3OGRhM2ZhYjM4OQ==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
MDQ4YzFhNzM2YjE2YjdhYmU1YTU2MzJlZDE4Nzc4NDM0YmJlZWUyYTI1OTdj
|
10
|
+
ZTUwNzk0MmUyOTdmMjRjMjYzZmVmMDM0NjhiZjA5ZDc4NmMyNzQ5YzdiM2Nh
|
11
|
+
NmZhNDVjYTQyNjQ3YjI4MzQ4ZmE0YTY4ZjRjOGNkZTU2MjFjYmU=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
NGYzZmVmNTY2ZmEzZDM0NGI0OTJhOTNmZWUyNjViYTk3MGI1NzgxMWYxNTI1
|
14
|
+
NTU0YWE1YjJhYzgxOTY3ZDcyNzFiZWMxZGI5ZDZkNDJjM2Y1YmRiNmY0MTZh
|
15
|
+
M2M5M2EzZWNiZjNiNTlkMzkxNDBlYzU2ZTBmN2EyNDY2NWJlYWQ=
|
data/README.md
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
Archivist::Client
|
2
|
+
=========
|
3
|
+
|
4
|
+
an *archive.org* Ruby client
|
5
|
+
|
6
|
+
| Project | Archive::Client |
|
7
|
+
|------------------------ | ----------------- |
|
8
|
+
| gem name | archive-client |
|
9
|
+
| license | MIT |
|
10
|
+
| moldiness | [![Maintainer Status](http://stillmaintained.com/wordtreefoundation/archive-client.png)](http://stillmaintained.com/wordtreefoundation/archive-client) |
|
11
|
+
| version | [![Gem Version](https://badge.fury.io/rb/archive-client.png)](http://badge.fury.io/rb/archive-client) |
|
12
|
+
| dependencies | [![Dependency Status](https://gemnasium.com/wordtreefoundation/archive-client.png)](https://gemnasium.com/wordtreefoundation/archive-client) |
|
13
|
+
| code quality | [![Code Climate](https://codeclimate.com/github/wordtreefoundation/archive-client.png)](https://codeclimate.com/github/wordtreefoundation/archive-client) |
|
14
|
+
| continuous integration | [![Build Status](https://secure.travis-ci.org/wordtreefoundation/archive-client.png?branch=master)](https://travis-ci.org/wordtreefoundation/archive-client) |
|
15
|
+
| test coverage | [![Coverage Status](https://coveralls.io/repos/wordtreefoundation/archive-client/badge.png)](https://coveralls.io/r/wordtreefoundation/archive-client) |
|
16
|
+
| homepage | [https://github.com/wordtreefoundation/archive-client][homepage] |
|
17
|
+
| documentation | [http://rdoc.info/github/wordtreefoundation/archive-client/frames][documentation] |
|
18
|
+
| authors | [Duane Johnson](https://coderbits.com/canadaduane) [![Endorse Duane](https://api.coderwall.com/canadaduane/endorsecount.png)](http://coderwall.com/canadaduane) |
|
19
|
+
| | [Peter Boling](https://coderbits.com/pboling) [![Endorse Peter](https://api.coderwall.com/pboling/endorsecount.png)](http://coderwall.com/pboling) |
|
20
|
+
|
21
|
+
|
22
|
+
Getting Started
|
23
|
+
---------------
|
24
|
+
|
25
|
+
```ruby
|
26
|
+
require 'archive-client'
|
27
|
+
```
|
28
|
+
|
29
|
+
Create an Archivist client:
|
30
|
+
```ruby
|
31
|
+
client = Archivist::Client::Base.new
|
32
|
+
```
|
33
|
+
|
34
|
+
Search for the books you're interested in:
|
35
|
+
```ruby
|
36
|
+
books = client.search(:start_year => 1500, :end_year => 1510)
|
37
|
+
```
|
38
|
+
|
39
|
+
Download them:
|
40
|
+
```ruby
|
41
|
+
books.each do |book|
|
42
|
+
puts book.download
|
43
|
+
end
|
44
|
+
```
|
45
|
+
|
46
|
+
## Contributors
|
47
|
+
|
48
|
+
See the [Network View](https://github.com/wordtreefoundation/archive-client/network) and the [CHANGELOG](https://github.com/wordtreefoundation/archive-client/blob/master/CHANGELOG.md)
|
49
|
+
|
50
|
+
## How you can help!
|
51
|
+
|
52
|
+
Take a look at the `reek` list which is the file called `REEK` and stat fixing things. Once you complete a change, run the tests:
|
53
|
+
|
54
|
+
```
|
55
|
+
bundle exec rspec spec
|
56
|
+
```
|
57
|
+
|
58
|
+
If the tests pass refresh the `reek` list:
|
59
|
+
|
60
|
+
```
|
61
|
+
bundle exec rake reek > REEK
|
62
|
+
```
|
63
|
+
|
64
|
+
Follow the instructions for "Contributing" below.
|
65
|
+
|
66
|
+
## Contributing
|
67
|
+
|
68
|
+
1. Fork it
|
69
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
70
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
71
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
72
|
+
5. Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
73
|
+
6. Create new Pull Request
|
74
|
+
|
75
|
+
## Versioning
|
76
|
+
|
77
|
+
This library aims to adhere to [Semantic Versioning 2.0.0][semver].
|
78
|
+
Violations of this scheme should be reported as bugs. Specifically,
|
79
|
+
if a minor or patch version is released that breaks backward
|
80
|
+
compatibility, a new version should be immediately released that
|
81
|
+
restores compatibility. Breaking changes to the public API will
|
82
|
+
only be introduced with new major versions.
|
83
|
+
|
84
|
+
As a result of this policy, you can (and should) specify a
|
85
|
+
dependency on this gem using the [Pessimistic Version Constraint][pvc] with two digits of precision.
|
86
|
+
|
87
|
+
For example:
|
88
|
+
|
89
|
+
spec.add_dependency 'archivist-client', '~> 0.1.0'
|
90
|
+
|
91
|
+
## References
|
92
|
+
|
93
|
+
* [Source Code](http://github.com/wordtreefoundation/archive-client)
|
94
|
+
* [Release Announcement](http://www.textual-analysis.org/2013/11/archivist/)
|
95
|
+
|
96
|
+
## Legal
|
97
|
+
|
98
|
+
* MIT License - See LICENSE file in this project
|
99
|
+
* Copyright (c) 2013 Duane Johnson & Word Tree Foundation
|
100
|
+
|
101
|
+
[semver]: http://semver.org/
|
102
|
+
[pvc]: http://docs.rubygems.org/read/chapter/16#page74
|
103
|
+
[documentation]: http://rdoc.info/github/wordtreefoundation/archive-client/frames
|
104
|
+
[homepage]: https://github.com/wordtreefoundation/archive-client
|
105
|
+
|
106
|
+
[![Bitdeli Badge](https://d2weczhvl823v0.cloudfront.net/wordtreefoundation/archive-client/trend.png)](https://bitdeli.com/free "Bitdeli Badge")
|
107
|
+
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'archivist/client/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = "archivist-client"
|
8
|
+
gem.summary = "archivist-client is an archive.org client library"
|
9
|
+
gem.description = "archivist-client queries archive.org for book data and downloads some things."
|
10
|
+
gem.homepage = "http://github.com/canadaduane/archivist-client"
|
11
|
+
gem.authors = ['Duane Johnson','Peter Boling']
|
12
|
+
gem.email = ['duane.johnson@gmail.com', 'peter.boling@gmail.com']
|
13
|
+
gem.licenses = ["MIT"]
|
14
|
+
|
15
|
+
gem.files = %w[archivist-client.gemspec README.md]
|
16
|
+
gem.files += Dir.glob("lib/**/*.rb")
|
17
|
+
gem.files += Dir.glob("spec/**/*")
|
18
|
+
|
19
|
+
gem.test_files = Dir.glob("spec/**/*")
|
20
|
+
gem.require_paths = ["lib"]
|
21
|
+
gem.version = Archivist::Client::VERSION
|
22
|
+
gem.required_ruby_version = '>= 1.9.0'
|
23
|
+
|
24
|
+
gem.add_development_dependency "rake"
|
25
|
+
gem.add_development_dependency "bundler", ">= 1.0.0"
|
26
|
+
gem.add_development_dependency "rspec", "~> 2.6"
|
27
|
+
gem.add_development_dependency "debugger"
|
28
|
+
gem.add_development_dependency "webmock"
|
29
|
+
gem.add_development_dependency "vcr"
|
30
|
+
gem.add_development_dependency "reek", ">= 1.2.8"
|
31
|
+
gem.add_development_dependency "roodi", ">= 2.1.0"
|
32
|
+
|
33
|
+
gem.add_dependency "virtus", "~> 1.0.0"
|
34
|
+
gem.add_dependency "representable"
|
35
|
+
gem.add_dependency "faraday"
|
36
|
+
gem.add_dependency "faraday_middleware"
|
37
|
+
end
|
data/lib/archivist.rb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'faraday'
|
2
|
+
require 'faraday_middleware'
|
3
|
+
require 'archivist/representations'
|
4
|
+
|
5
|
+
module Archivist
|
6
|
+
module Client
|
7
|
+
class Base
|
8
|
+
attr_reader :conn
|
9
|
+
|
10
|
+
def initialize(opts = {})
|
11
|
+
@opts = {
|
12
|
+
page: 1,
|
13
|
+
rows: 50
|
14
|
+
}.merge(opts)
|
15
|
+
|
16
|
+
@conn = Faraday.new(url: 'http://archive.org') do |faraday|
|
17
|
+
faraday.use FaradayMiddleware::FollowRedirects
|
18
|
+
faraday.request :url_encoded # form-encode POST params
|
19
|
+
faraday.adapter Faraday.default_adapter # make requests with Net::HTTP
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def query(opts)
|
24
|
+
filters = [
|
25
|
+
'mediatype:texts',
|
26
|
+
'-mediatype:collection'
|
27
|
+
]
|
28
|
+
|
29
|
+
filters.concat(opts.delete(:filters)) if opts[:filters]
|
30
|
+
|
31
|
+
filters << if opts[:language]
|
32
|
+
"language:#{opts.delete(:language)}"
|
33
|
+
else
|
34
|
+
'(language:eng OR language:English)'
|
35
|
+
end
|
36
|
+
|
37
|
+
if opts[:start_year] && opts[:end_year]
|
38
|
+
start_year = "#{opts.delete(:start_year)}-01-01"
|
39
|
+
end_year = "#{opts.delete(:end_year)}-12-31"
|
40
|
+
filters << "date:[#{start_year} TO #{end_year}]"
|
41
|
+
end
|
42
|
+
|
43
|
+
filters.join(' AND ')
|
44
|
+
end
|
45
|
+
|
46
|
+
def params(opts = {})
|
47
|
+
{
|
48
|
+
q: query(opts),
|
49
|
+
fl: %w(identifier title creator date language mediattype),
|
50
|
+
sort: ['date asc'],
|
51
|
+
output: 'json'
|
52
|
+
}.merge(@opts).merge(opts)
|
53
|
+
end
|
54
|
+
|
55
|
+
def search(opts = {})
|
56
|
+
Model::QueryResponse.new.tap do |qr|
|
57
|
+
response = @conn.get('/advancedsearch.php', params(opts))
|
58
|
+
Representation::QueryResponse.new(qr).from_json(response.body)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,7 @@
|
|
1
|
+
require 'archivist/models/body'
|
2
|
+
require 'archivist/models/document'
|
3
|
+
require 'archivist/models/file_format'
|
4
|
+
require 'archivist/models/format_index'
|
5
|
+
require 'archivist/models/header'
|
6
|
+
require 'archivist/models/parameters'
|
7
|
+
require 'archivist/models/query_response'
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'virtus'
|
2
|
+
require 'date'
|
3
|
+
|
4
|
+
module Archivist
|
5
|
+
module Model
|
6
|
+
class Document
|
7
|
+
include Virtus.model
|
8
|
+
|
9
|
+
attribute :identifier, String
|
10
|
+
attribute :title, String
|
11
|
+
attribute :date, Date
|
12
|
+
attribute :languages, Array[String]
|
13
|
+
attribute :creators, Array[String]
|
14
|
+
|
15
|
+
def initialize
|
16
|
+
@conn = Faraday.new(url: "http://archive.org") do |faraday|
|
17
|
+
faraday.use FaradayMiddleware::FollowRedirects
|
18
|
+
faraday.request :url_encoded # form-encode POST params
|
19
|
+
# faraday.response :logger # log requests to STDOUT
|
20
|
+
faraday.adapter Faraday.default_adapter # make requests with Net::HTTP
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def format_index
|
25
|
+
response = @conn.get(index_xml_path)
|
26
|
+
Model::FormatIndex.new.tap do |idx|
|
27
|
+
Representation::FormatIndex.new(idx).from_xml(response.body)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def download(format=:text)
|
32
|
+
# e.g. format_index.text_format
|
33
|
+
file_format = format_index.send(:"#{format}_format")
|
34
|
+
# e.g. /download/firstbooknapole00gruagoog/firstbooknapole00gruagoog_djvu.txt
|
35
|
+
@conn.get(download_path(file_format.name)).
|
36
|
+
body.force_encoding('UTF-8')
|
37
|
+
end
|
38
|
+
|
39
|
+
def download_path(file)
|
40
|
+
"/download/#{identifier}/#{file}"
|
41
|
+
end
|
42
|
+
|
43
|
+
def index_xml_path
|
44
|
+
download_path("#{identifier}_files.xml")
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'virtus'
|
2
|
+
|
3
|
+
module Archivist
|
4
|
+
module Model
|
5
|
+
class FileFormat
|
6
|
+
include Virtus.model
|
7
|
+
|
8
|
+
attribute :name
|
9
|
+
attribute :source
|
10
|
+
attribute :format
|
11
|
+
attribute :mtime
|
12
|
+
attribute :size
|
13
|
+
attribute :md5
|
14
|
+
attribute :crc32
|
15
|
+
attribute :sha1
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'virtus'
|
2
|
+
|
3
|
+
module Archivist
|
4
|
+
module Model
|
5
|
+
class FormatIndex
|
6
|
+
include Virtus.model
|
7
|
+
|
8
|
+
attribute :formats
|
9
|
+
|
10
|
+
def find(key)
|
11
|
+
formats.find{ |f| f.format == key }
|
12
|
+
end
|
13
|
+
alias_method :[], :find
|
14
|
+
|
15
|
+
def text_format
|
16
|
+
find("DjVuTXT")
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'virtus'
|
2
|
+
|
3
|
+
module Archivist
|
4
|
+
module Model
|
5
|
+
class Parameters
|
6
|
+
include Virtus.model
|
7
|
+
|
8
|
+
attribute :rows, Integer
|
9
|
+
attribute :format, String
|
10
|
+
attribute :start, Integer
|
11
|
+
attribute :q, String
|
12
|
+
attribute :qin, String
|
13
|
+
attribute :fl, String
|
14
|
+
attribute :sort, String
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'virtus'
|
2
|
+
|
3
|
+
require_relative 'header'
|
4
|
+
require_relative 'body'
|
5
|
+
|
6
|
+
module Archivist
|
7
|
+
module Model
|
8
|
+
class QueryResponse
|
9
|
+
include Virtus.model
|
10
|
+
|
11
|
+
attribute :header, Header
|
12
|
+
attribute :body, Body
|
13
|
+
|
14
|
+
def each(&block)
|
15
|
+
body && body.docs.each(&block)
|
16
|
+
end
|
17
|
+
|
18
|
+
def empty?
|
19
|
+
body && body.docs.empty?
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
require 'archivist/models'
|
2
|
+
|
3
|
+
require 'archivist/representations/body'
|
4
|
+
require 'archivist/representations/document'
|
5
|
+
require 'archivist/representations/file_format'
|
6
|
+
require 'archivist/representations/format_index'
|
7
|
+
require 'archivist/representations/header'
|
8
|
+
require 'archivist/representations/parameters'
|
9
|
+
require 'archivist/representations/query_response'
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'archivist/models'
|
2
|
+
require 'representable'
|
3
|
+
require 'representable/json'
|
4
|
+
|
5
|
+
require_relative 'document'
|
6
|
+
|
7
|
+
module Archivist
|
8
|
+
module Representation
|
9
|
+
class Body < Representable::Decorator
|
10
|
+
include Representable::JSON
|
11
|
+
|
12
|
+
property :num_found, as: "numFound"
|
13
|
+
property :start
|
14
|
+
collection :docs,
|
15
|
+
class: Model::Document,
|
16
|
+
decorator: Document
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'archivist/models'
|
2
|
+
require 'representable'
|
3
|
+
require 'representable/json'
|
4
|
+
|
5
|
+
module Archivist
|
6
|
+
module Representation
|
7
|
+
class Document < Representable::Decorator
|
8
|
+
include Representable::JSON
|
9
|
+
|
10
|
+
property :identifier
|
11
|
+
property :title
|
12
|
+
property :date
|
13
|
+
collection :languages, as: "language"
|
14
|
+
collection :creators, as: "creator"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|