archivist-client 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +15 -0
  2. data/README.md +107 -0
  3. data/archivist-client.gemspec +37 -0
  4. data/lib/archivist.rb +2 -0
  5. data/lib/archivist/client/base.rb +63 -0
  6. data/lib/archivist/client/version.rb +5 -0
  7. data/lib/archivist/models.rb +7 -0
  8. data/lib/archivist/models/body.rb +15 -0
  9. data/lib/archivist/models/document.rb +48 -0
  10. data/lib/archivist/models/file_format.rb +18 -0
  11. data/lib/archivist/models/format_index.rb +20 -0
  12. data/lib/archivist/models/header.rb +15 -0
  13. data/lib/archivist/models/parameters.rb +17 -0
  14. data/lib/archivist/models/query_response.rb +23 -0
  15. data/lib/archivist/representations.rb +9 -0
  16. data/lib/archivist/representations/body.rb +19 -0
  17. data/lib/archivist/representations/document.rb +17 -0
  18. data/lib/archivist/representations/file_format.rb +21 -0
  19. data/lib/archivist/representations/format_index.rb +17 -0
  20. data/lib/archivist/representations/header.rb +19 -0
  21. data/lib/archivist/representations/parameters.rb +19 -0
  22. data/lib/archivist/representations/query_response.rb +21 -0
  23. data/spec/client_spec.rb +17 -0
  24. data/spec/fixtures/document.json +7 -0
  25. data/spec/fixtures/download_files.xml +109 -0
  26. data/spec/fixtures/query-response.json +1 -0
  27. data/spec/fixtures/vcr/download.yml +1506 -0
  28. data/spec/fixtures/vcr/links.yml +127 -0
  29. data/spec/fixtures/vcr/search.yml +110 -0
  30. data/spec/models/body_spec.rb +7 -0
  31. data/spec/models/document_spec.rb +27 -0
  32. data/spec/models/file_format_spec.rb +9 -0
  33. data/spec/models/format_index_spec.rb +9 -0
  34. data/spec/models/header_spec.rb +7 -0
  35. data/spec/models/parameters_spec.rb +7 -0
  36. data/spec/models/query_response_spec.rb +7 -0
  37. data/spec/representations/document_spec.rb +17 -0
  38. data/spec/representations/format_index_spec.rb +32 -0
  39. data/spec/representations/query_response_spec.rb +28 -0
  40. data/spec/spec_helper.rb +12 -0
  41. metadata +272 -0
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ OTNjMjQ1ZTA3M2RkNzA3NzA0MDg1ZjY2MmMwMzYwODYzYzM1ZTZhMA==
5
+ data.tar.gz: !binary |-
6
+ ZGYzZGU5NjBlNzI4NGNkZGM4NmVkNWU1NjY0NTM3OGRhM2ZhYjM4OQ==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ MDQ4YzFhNzM2YjE2YjdhYmU1YTU2MzJlZDE4Nzc4NDM0YmJlZWUyYTI1OTdj
10
+ ZTUwNzk0MmUyOTdmMjRjMjYzZmVmMDM0NjhiZjA5ZDc4NmMyNzQ5YzdiM2Nh
11
+ NmZhNDVjYTQyNjQ3YjI4MzQ4ZmE0YTY4ZjRjOGNkZTU2MjFjYmU=
12
+ data.tar.gz: !binary |-
13
+ NGYzZmVmNTY2ZmEzZDM0NGI0OTJhOTNmZWUyNjViYTk3MGI1NzgxMWYxNTI1
14
+ NTU0YWE1YjJhYzgxOTY3ZDcyNzFiZWMxZGI5ZDZkNDJjM2Y1YmRiNmY0MTZh
15
+ M2M5M2EzZWNiZjNiNTlkMzkxNDBlYzU2ZTBmN2EyNDY2NWJlYWQ=
@@ -0,0 +1,107 @@
1
+ Archivist::Client
2
+ =========
3
+
4
+ an *archive.org* Ruby client
5
+
6
+ | Project | Archive::Client |
7
+ |------------------------ | ----------------- |
8
+ | gem name | archive-client |
9
+ | license | MIT |
10
+ | moldiness | [![Maintainer Status](http://stillmaintained.com/wordtreefoundation/archive-client.png)](http://stillmaintained.com/wordtreefoundation/archive-client) |
11
+ | version | [![Gem Version](https://badge.fury.io/rb/archive-client.png)](http://badge.fury.io/rb/archive-client) |
12
+ | dependencies | [![Dependency Status](https://gemnasium.com/wordtreefoundation/archive-client.png)](https://gemnasium.com/wordtreefoundation/archive-client) |
13
+ | code quality | [![Code Climate](https://codeclimate.com/github/wordtreefoundation/archive-client.png)](https://codeclimate.com/github/wordtreefoundation/archive-client) |
14
+ | continuous integration | [![Build Status](https://secure.travis-ci.org/wordtreefoundation/archive-client.png?branch=master)](https://travis-ci.org/wordtreefoundation/archive-client) |
15
+ | test coverage | [![Coverage Status](https://coveralls.io/repos/wordtreefoundation/archive-client/badge.png)](https://coveralls.io/r/wordtreefoundation/archive-client) |
16
+ | homepage | [https://github.com/wordtreefoundation/archive-client][homepage] |
17
+ | documentation | [http://rdoc.info/github/wordtreefoundation/archive-client/frames][documentation] |
18
+ | authors | [Duane Johnson](https://coderbits.com/canadaduane) [![Endorse Duane](https://api.coderwall.com/canadaduane/endorsecount.png)](http://coderwall.com/canadaduane) |
19
+ | | [Peter Boling](https://coderbits.com/pboling) [![Endorse Peter](https://api.coderwall.com/pboling/endorsecount.png)](http://coderwall.com/pboling) |
20
+
21
+
22
+ Getting Started
23
+ ---------------
24
+
25
+ ```ruby
26
+ require 'archive-client'
27
+ ```
28
+
29
+ Create an Archivist client:
30
+ ```ruby
31
+ client = Archivist::Client::Base.new
32
+ ```
33
+
34
+ Search for the books you're interested in:
35
+ ```ruby
36
+ books = client.search(:start_year => 1500, :end_year => 1510)
37
+ ```
38
+
39
+ Download them:
40
+ ```ruby
41
+ books.each do |book|
42
+ puts book.download
43
+ end
44
+ ```
45
+
46
+ ## Contributors
47
+
48
+ See the [Network View](https://github.com/wordtreefoundation/archive-client/network) and the [CHANGELOG](https://github.com/wordtreefoundation/archive-client/blob/master/CHANGELOG.md)
49
+
50
+ ## How you can help!
51
+
52
+ Take a look at the `reek` list which is the file called `REEK` and stat fixing things. Once you complete a change, run the tests:
53
+
54
+ ```
55
+ bundle exec rspec spec
56
+ ```
57
+
58
+ If the tests pass refresh the `reek` list:
59
+
60
+ ```
61
+ bundle exec rake reek > REEK
62
+ ```
63
+
64
+ Follow the instructions for "Contributing" below.
65
+
66
+ ## Contributing
67
+
68
+ 1. Fork it
69
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
70
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
71
+ 4. Push to the branch (`git push origin my-new-feature`)
72
+ 5. Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
73
+ 6. Create new Pull Request
74
+
75
+ ## Versioning
76
+
77
+ This library aims to adhere to [Semantic Versioning 2.0.0][semver].
78
+ Violations of this scheme should be reported as bugs. Specifically,
79
+ if a minor or patch version is released that breaks backward
80
+ compatibility, a new version should be immediately released that
81
+ restores compatibility. Breaking changes to the public API will
82
+ only be introduced with new major versions.
83
+
84
+ As a result of this policy, you can (and should) specify a
85
+ dependency on this gem using the [Pessimistic Version Constraint][pvc] with two digits of precision.
86
+
87
+ For example:
88
+
89
+ spec.add_dependency 'archivist-client', '~> 0.1.0'
90
+
91
+ ## References
92
+
93
+ * [Source Code](http://github.com/wordtreefoundation/archive-client)
94
+ * [Release Announcement](http://www.textual-analysis.org/2013/11/archivist/)
95
+
96
+ ## Legal
97
+
98
+ * MIT License - See LICENSE file in this project
99
+ * Copyright (c) 2013 Duane Johnson & Word Tree Foundation
100
+
101
+ [semver]: http://semver.org/
102
+ [pvc]: http://docs.rubygems.org/read/chapter/16#page74
103
+ [documentation]: http://rdoc.info/github/wordtreefoundation/archive-client/frames
104
+ [homepage]: https://github.com/wordtreefoundation/archive-client
105
+
106
+ [![Bitdeli Badge](https://d2weczhvl823v0.cloudfront.net/wordtreefoundation/archive-client/trend.png)](https://bitdeli.com/free "Bitdeli Badge")
107
+
@@ -0,0 +1,37 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'archivist/client/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "archivist-client"
8
+ gem.summary = "archivist-client is an archive.org client library"
9
+ gem.description = "archivist-client queries archive.org for book data and downloads some things."
10
+ gem.homepage = "http://github.com/canadaduane/archivist-client"
11
+ gem.authors = ['Duane Johnson','Peter Boling']
12
+ gem.email = ['duane.johnson@gmail.com', 'peter.boling@gmail.com']
13
+ gem.licenses = ["MIT"]
14
+
15
+ gem.files = %w[archivist-client.gemspec README.md]
16
+ gem.files += Dir.glob("lib/**/*.rb")
17
+ gem.files += Dir.glob("spec/**/*")
18
+
19
+ gem.test_files = Dir.glob("spec/**/*")
20
+ gem.require_paths = ["lib"]
21
+ gem.version = Archivist::Client::VERSION
22
+ gem.required_ruby_version = '>= 1.9.0'
23
+
24
+ gem.add_development_dependency "rake"
25
+ gem.add_development_dependency "bundler", ">= 1.0.0"
26
+ gem.add_development_dependency "rspec", "~> 2.6"
27
+ gem.add_development_dependency "debugger"
28
+ gem.add_development_dependency "webmock"
29
+ gem.add_development_dependency "vcr"
30
+ gem.add_development_dependency "reek", ">= 1.2.8"
31
+ gem.add_development_dependency "roodi", ">= 2.1.0"
32
+
33
+ gem.add_dependency "virtus", "~> 1.0.0"
34
+ gem.add_dependency "representable"
35
+ gem.add_dependency "faraday"
36
+ gem.add_dependency "faraday_middleware"
37
+ end
@@ -0,0 +1,2 @@
1
+ require 'archivist/client/base'
2
+ require 'archivist/client/version'
@@ -0,0 +1,63 @@
1
+ require 'faraday'
2
+ require 'faraday_middleware'
3
+ require 'archivist/representations'
4
+
5
+ module Archivist
6
+ module Client
7
+ class Base
8
+ attr_reader :conn
9
+
10
+ def initialize(opts = {})
11
+ @opts = {
12
+ page: 1,
13
+ rows: 50
14
+ }.merge(opts)
15
+
16
+ @conn = Faraday.new(url: 'http://archive.org') do |faraday|
17
+ faraday.use FaradayMiddleware::FollowRedirects
18
+ faraday.request :url_encoded # form-encode POST params
19
+ faraday.adapter Faraday.default_adapter # make requests with Net::HTTP
20
+ end
21
+ end
22
+
23
+ def query(opts)
24
+ filters = [
25
+ 'mediatype:texts',
26
+ '-mediatype:collection'
27
+ ]
28
+
29
+ filters.concat(opts.delete(:filters)) if opts[:filters]
30
+
31
+ filters << if opts[:language]
32
+ "language:#{opts.delete(:language)}"
33
+ else
34
+ '(language:eng OR language:English)'
35
+ end
36
+
37
+ if opts[:start_year] && opts[:end_year]
38
+ start_year = "#{opts.delete(:start_year)}-01-01"
39
+ end_year = "#{opts.delete(:end_year)}-12-31"
40
+ filters << "date:[#{start_year} TO #{end_year}]"
41
+ end
42
+
43
+ filters.join(' AND ')
44
+ end
45
+
46
+ def params(opts = {})
47
+ {
48
+ q: query(opts),
49
+ fl: %w(identifier title creator date language mediattype),
50
+ sort: ['date asc'],
51
+ output: 'json'
52
+ }.merge(@opts).merge(opts)
53
+ end
54
+
55
+ def search(opts = {})
56
+ Model::QueryResponse.new.tap do |qr|
57
+ response = @conn.get('/advancedsearch.php', params(opts))
58
+ Representation::QueryResponse.new(qr).from_json(response.body)
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,5 @@
1
+ module Archivist
2
+ module Client
3
+ VERSION = '0.1.1'
4
+ end
5
+ end
@@ -0,0 +1,7 @@
1
+ require 'archivist/models/body'
2
+ require 'archivist/models/document'
3
+ require 'archivist/models/file_format'
4
+ require 'archivist/models/format_index'
5
+ require 'archivist/models/header'
6
+ require 'archivist/models/parameters'
7
+ require 'archivist/models/query_response'
@@ -0,0 +1,15 @@
1
+ require 'virtus'
2
+
3
+ require_relative 'document'
4
+
5
+ module Archivist
6
+ module Model
7
+ class Body
8
+ include Virtus.model
9
+
10
+ attribute :num_found, Integer
11
+ attribute :start, Integer
12
+ attribute :docs, Array[Document]
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,48 @@
1
+ require 'virtus'
2
+ require 'date'
3
+
4
+ module Archivist
5
+ module Model
6
+ class Document
7
+ include Virtus.model
8
+
9
+ attribute :identifier, String
10
+ attribute :title, String
11
+ attribute :date, Date
12
+ attribute :languages, Array[String]
13
+ attribute :creators, Array[String]
14
+
15
+ def initialize
16
+ @conn = Faraday.new(url: "http://archive.org") do |faraday|
17
+ faraday.use FaradayMiddleware::FollowRedirects
18
+ faraday.request :url_encoded # form-encode POST params
19
+ # faraday.response :logger # log requests to STDOUT
20
+ faraday.adapter Faraday.default_adapter # make requests with Net::HTTP
21
+ end
22
+ end
23
+
24
+ def format_index
25
+ response = @conn.get(index_xml_path)
26
+ Model::FormatIndex.new.tap do |idx|
27
+ Representation::FormatIndex.new(idx).from_xml(response.body)
28
+ end
29
+ end
30
+
31
+ def download(format=:text)
32
+ # e.g. format_index.text_format
33
+ file_format = format_index.send(:"#{format}_format")
34
+ # e.g. /download/firstbooknapole00gruagoog/firstbooknapole00gruagoog_djvu.txt
35
+ @conn.get(download_path(file_format.name)).
36
+ body.force_encoding('UTF-8')
37
+ end
38
+
39
+ def download_path(file)
40
+ "/download/#{identifier}/#{file}"
41
+ end
42
+
43
+ def index_xml_path
44
+ download_path("#{identifier}_files.xml")
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,18 @@
1
+ require 'virtus'
2
+
3
+ module Archivist
4
+ module Model
5
+ class FileFormat
6
+ include Virtus.model
7
+
8
+ attribute :name
9
+ attribute :source
10
+ attribute :format
11
+ attribute :mtime
12
+ attribute :size
13
+ attribute :md5
14
+ attribute :crc32
15
+ attribute :sha1
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,20 @@
1
+ require 'virtus'
2
+
3
+ module Archivist
4
+ module Model
5
+ class FormatIndex
6
+ include Virtus.model
7
+
8
+ attribute :formats
9
+
10
+ def find(key)
11
+ formats.find{ |f| f.format == key }
12
+ end
13
+ alias_method :[], :find
14
+
15
+ def text_format
16
+ find("DjVuTXT")
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,15 @@
1
+ require 'virtus'
2
+
3
+ require_relative 'parameters'
4
+
5
+ module Archivist
6
+ module Model
7
+ class Header
8
+ include Virtus.model
9
+
10
+ attribute :status, Integer
11
+ attribute :qtime, Integer
12
+ attribute :params, Parameters
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,17 @@
1
+ require 'virtus'
2
+
3
+ module Archivist
4
+ module Model
5
+ class Parameters
6
+ include Virtus.model
7
+
8
+ attribute :rows, Integer
9
+ attribute :format, String
10
+ attribute :start, Integer
11
+ attribute :q, String
12
+ attribute :qin, String
13
+ attribute :fl, String
14
+ attribute :sort, String
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,23 @@
1
+ require 'virtus'
2
+
3
+ require_relative 'header'
4
+ require_relative 'body'
5
+
6
+ module Archivist
7
+ module Model
8
+ class QueryResponse
9
+ include Virtus.model
10
+
11
+ attribute :header, Header
12
+ attribute :body, Body
13
+
14
+ def each(&block)
15
+ body && body.docs.each(&block)
16
+ end
17
+
18
+ def empty?
19
+ body && body.docs.empty?
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,9 @@
1
+ require 'archivist/models'
2
+
3
+ require 'archivist/representations/body'
4
+ require 'archivist/representations/document'
5
+ require 'archivist/representations/file_format'
6
+ require 'archivist/representations/format_index'
7
+ require 'archivist/representations/header'
8
+ require 'archivist/representations/parameters'
9
+ require 'archivist/representations/query_response'
@@ -0,0 +1,19 @@
1
+ require 'archivist/models'
2
+ require 'representable'
3
+ require 'representable/json'
4
+
5
+ require_relative 'document'
6
+
7
+ module Archivist
8
+ module Representation
9
+ class Body < Representable::Decorator
10
+ include Representable::JSON
11
+
12
+ property :num_found, as: "numFound"
13
+ property :start
14
+ collection :docs,
15
+ class: Model::Document,
16
+ decorator: Document
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,17 @@
1
+ require 'archivist/models'
2
+ require 'representable'
3
+ require 'representable/json'
4
+
5
+ module Archivist
6
+ module Representation
7
+ class Document < Representable::Decorator
8
+ include Representable::JSON
9
+
10
+ property :identifier
11
+ property :title
12
+ property :date
13
+ collection :languages, as: "language"
14
+ collection :creators, as: "creator"
15
+ end
16
+ end
17
+ end