archdown 0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +3 -0
- data/Gemfile.lock +107 -0
- data/Guardfile +8 -0
- data/LICENSE +20 -0
- data/README.md +16 -0
- data/bin/archdown +43 -0
- data/lib/archdown/download.rb +26 -0
- data/lib/archdown/librarian.rb +64 -0
- data/lib/archdown/library.rb +22 -0
- data/lib/archdown/version.rb +3 -0
- data/lib/archdown.rb +3 -0
- data/spec/archdown/library_spec.rb +18 -0
- data/spec/spec_helper.rb +25 -0
- metadata +175 -0
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
archdown (0.3)
|
5
|
+
archivist-client
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: https://rubygems.org/
|
9
|
+
specs:
|
10
|
+
archivist-client (0.1.6)
|
11
|
+
faraday
|
12
|
+
faraday_middleware
|
13
|
+
representable
|
14
|
+
virtus (~> 1.0.0)
|
15
|
+
axiom-types (0.1.1)
|
16
|
+
descendants_tracker (~> 0.0.4)
|
17
|
+
ice_nine (~> 0.11.0)
|
18
|
+
thread_safe (~> 0.3, >= 0.3.1)
|
19
|
+
celluloid (0.15.2)
|
20
|
+
timers (~> 1.1.0)
|
21
|
+
coderay (1.1.0)
|
22
|
+
coercible (1.0.0)
|
23
|
+
descendants_tracker (~> 0.0.1)
|
24
|
+
columnize (0.8.9)
|
25
|
+
debugger (1.6.8)
|
26
|
+
columnize (>= 0.3.1)
|
27
|
+
debugger-linecache (~> 1.2.0)
|
28
|
+
debugger-ruby_core_source (~> 1.3.5)
|
29
|
+
debugger-linecache (1.2.0)
|
30
|
+
debugger-ruby_core_source (1.3.5)
|
31
|
+
descendants_tracker (0.0.4)
|
32
|
+
thread_safe (~> 0.3, >= 0.3.1)
|
33
|
+
diff-lcs (1.2.5)
|
34
|
+
equalizer (0.0.9)
|
35
|
+
faraday (0.9.0)
|
36
|
+
multipart-post (>= 1.2, < 3)
|
37
|
+
faraday_middleware (0.9.1)
|
38
|
+
faraday (>= 0.7.4, < 0.10)
|
39
|
+
ffi (1.9.3)
|
40
|
+
formatador (0.2.5)
|
41
|
+
guard (2.6.1)
|
42
|
+
formatador (>= 0.2.4)
|
43
|
+
listen (~> 2.7)
|
44
|
+
lumberjack (~> 1.0)
|
45
|
+
pry (>= 0.9.12)
|
46
|
+
thor (>= 0.18.1)
|
47
|
+
guard-rspec (4.2.10)
|
48
|
+
guard (~> 2.1)
|
49
|
+
rspec (>= 2.14, < 4.0)
|
50
|
+
ice_nine (0.11.0)
|
51
|
+
listen (2.7.9)
|
52
|
+
celluloid (>= 0.15.2)
|
53
|
+
rb-fsevent (>= 0.9.3)
|
54
|
+
rb-inotify (>= 0.9)
|
55
|
+
lumberjack (1.0.9)
|
56
|
+
method_source (0.8.2)
|
57
|
+
mini_portile (0.6.0)
|
58
|
+
multi_json (1.10.1)
|
59
|
+
multipart-post (2.0.0)
|
60
|
+
nokogiri (1.6.2.1)
|
61
|
+
mini_portile (= 0.6.0)
|
62
|
+
pry (0.10.0)
|
63
|
+
coderay (~> 1.1.0)
|
64
|
+
method_source (~> 0.8.1)
|
65
|
+
slop (~> 3.4)
|
66
|
+
rake (10.3.2)
|
67
|
+
rb-fsevent (0.9.4)
|
68
|
+
rb-inotify (0.9.5)
|
69
|
+
ffi (>= 0.5.0)
|
70
|
+
representable (1.8.5)
|
71
|
+
multi_json
|
72
|
+
nokogiri
|
73
|
+
uber
|
74
|
+
rspec (3.0.0)
|
75
|
+
rspec-core (~> 3.0.0)
|
76
|
+
rspec-expectations (~> 3.0.0)
|
77
|
+
rspec-mocks (~> 3.0.0)
|
78
|
+
rspec-core (3.0.2)
|
79
|
+
rspec-support (~> 3.0.0)
|
80
|
+
rspec-expectations (3.0.2)
|
81
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
82
|
+
rspec-support (~> 3.0.0)
|
83
|
+
rspec-mocks (3.0.2)
|
84
|
+
rspec-support (~> 3.0.0)
|
85
|
+
rspec-support (3.0.2)
|
86
|
+
slop (3.5.0)
|
87
|
+
thor (0.19.1)
|
88
|
+
thread_safe (0.3.4)
|
89
|
+
timers (1.1.0)
|
90
|
+
uber (0.0.7)
|
91
|
+
virtus (1.0.2)
|
92
|
+
axiom-types (~> 0.1)
|
93
|
+
coercible (~> 1.0)
|
94
|
+
descendants_tracker (~> 0.0.3)
|
95
|
+
equalizer (~> 0.0.9)
|
96
|
+
|
97
|
+
PLATFORMS
|
98
|
+
ruby
|
99
|
+
|
100
|
+
DEPENDENCIES
|
101
|
+
archdown!
|
102
|
+
bundler (~> 1.3)
|
103
|
+
debugger
|
104
|
+
guard
|
105
|
+
guard-rspec
|
106
|
+
rake
|
107
|
+
rspec
|
data/Guardfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2014 WordTree Foundation
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
6
|
+
this software and associated documentation files (the "Software"), to deal in
|
7
|
+
the Software without restriction, including without limitation the rights to
|
8
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
9
|
+
the Software, and to permit persons to whom the Software is furnished to do so,
|
10
|
+
subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
17
|
+
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
18
|
+
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
19
|
+
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
20
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
archdown
|
2
|
+
========
|
3
|
+
|
4
|
+
Command-line utility to download books from archive.org using archivist-client
|
5
|
+
|
6
|
+
Example Usage
|
7
|
+
-------------
|
8
|
+
|
9
|
+
```sh
|
10
|
+
$ bundle install
|
11
|
+
$ bundle exec ruby archdown.rb -y 1700-1830
|
12
|
+
|
13
|
+
# ...
|
14
|
+
# downloads a lot of books into ./library
|
15
|
+
# ...
|
16
|
+
```
|
data/bin/archdown
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "archdown"
|
4
|
+
require 'optparse'
|
5
|
+
|
6
|
+
# download books (text files) from archive.org
|
7
|
+
|
8
|
+
options = {
|
9
|
+
:library_root => './library',
|
10
|
+
:start_year => 1700,
|
11
|
+
:end_year => 1700
|
12
|
+
}
|
13
|
+
|
14
|
+
parser = OptionParser.new do |opts|
|
15
|
+
opts.banner = "Usage: archdown [options]"
|
16
|
+
|
17
|
+
opts.on("-l", "--library PATH", "Store files in PATH") do |lib|
|
18
|
+
options[:library_root] = lib
|
19
|
+
end
|
20
|
+
|
21
|
+
opts.on("-y", "--year RANGE", "Query for books published within RANGE years, e.g. 1700-1750") do |range|
|
22
|
+
options[:start_year] = range.split('-').first.to_i
|
23
|
+
options[:end_year] = range.split('-').last.to_i
|
24
|
+
end
|
25
|
+
end
|
26
|
+
parser.parse!
|
27
|
+
|
28
|
+
if ARGV.empty?
|
29
|
+
download = Archdown::Download.new(
|
30
|
+
options[:library_root],
|
31
|
+
:start_year => options[:start_year],
|
32
|
+
:end_year => options[:end_year])
|
33
|
+
download.go! do |metadata, librarian|
|
34
|
+
puts metadata.to_yaml
|
35
|
+
end
|
36
|
+
else
|
37
|
+
puts parser
|
38
|
+
puts
|
39
|
+
p options, ARGV
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'archivist/client'
|
2
|
+
|
3
|
+
require 'archdown/librarian'
|
4
|
+
require 'archdown/library'
|
5
|
+
|
6
|
+
module Archdown
|
7
|
+
class Download
|
8
|
+
attr_reader :library_root, :search_terms
|
9
|
+
|
10
|
+
def initialize(library_root, search_terms)
|
11
|
+
@library = Library.new(library_root)
|
12
|
+
@search_terms = search_terms
|
13
|
+
@client = Archivist::Client::Base.new
|
14
|
+
end
|
15
|
+
|
16
|
+
def go!(&each_book)
|
17
|
+
page = 1
|
18
|
+
while not (books = @client.search(@search_terms.merge(:page => page))).empty?
|
19
|
+
books.each do |book|
|
20
|
+
Librarian.new(@library, book).store_book(&each_book)
|
21
|
+
end
|
22
|
+
page += 1
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'yaml'
|
3
|
+
|
4
|
+
require 'archivist/client'
|
5
|
+
|
6
|
+
module Archdown
|
7
|
+
# The Librarian takes a book and puts it in the library
|
8
|
+
class Librarian
|
9
|
+
attr_reader :library, :book
|
10
|
+
|
11
|
+
def initialize(library, book)
|
12
|
+
@library = library
|
13
|
+
@book = book
|
14
|
+
@failure = nil
|
15
|
+
end
|
16
|
+
|
17
|
+
def metadata
|
18
|
+
{
|
19
|
+
'title' => @book.title,
|
20
|
+
'author' => @book.creators ? @book.creators.join(';') : nil,
|
21
|
+
'year' => @book.date.year,
|
22
|
+
'source' => "http://archive.org/details/#{@book.identifier}",
|
23
|
+
'status' => "OCR ONLY",
|
24
|
+
'archive_org_id' => @book.identifier,
|
25
|
+
}.tap do |meta|
|
26
|
+
meta['failure'] = @failure if @failure
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def book_dir
|
31
|
+
@library.path_for_identifier(@book.identifier)
|
32
|
+
end
|
33
|
+
|
34
|
+
def book_filepath
|
35
|
+
File.join(book_dir, @book.identifier + '.md')
|
36
|
+
end
|
37
|
+
|
38
|
+
def make_book_dir
|
39
|
+
FileUtils.mkdir_p(book_dir)
|
40
|
+
end
|
41
|
+
|
42
|
+
def store_book(&block)
|
43
|
+
make_book_dir
|
44
|
+
|
45
|
+
begin
|
46
|
+
text = @book.download
|
47
|
+
rescue Archivist::Model::Document::UnsupportedFormat => e
|
48
|
+
@failure = e.to_s
|
49
|
+
rescue StandardError => e
|
50
|
+
@failure = e.to_s
|
51
|
+
end
|
52
|
+
|
53
|
+
yield metadata, self if block_given?
|
54
|
+
|
55
|
+
content = metadata.to_yaml
|
56
|
+
content += "---\n"
|
57
|
+
content += text if text
|
58
|
+
|
59
|
+
File.open(book_filepath, "w") do |file|
|
60
|
+
file.write content
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Archdown
|
2
|
+
# The Library knows about its locations for books (e.g. file directory)
|
3
|
+
class Library
|
4
|
+
attr_reader :path
|
5
|
+
|
6
|
+
def initialize(path)
|
7
|
+
@path = path
|
8
|
+
end
|
9
|
+
|
10
|
+
def first(identifier)
|
11
|
+
identifier[0..1].downcase
|
12
|
+
end
|
13
|
+
|
14
|
+
def last(identifier)
|
15
|
+
identifier[-2..-1].downcase
|
16
|
+
end
|
17
|
+
|
18
|
+
def path_for_identifier(identifier, *parts)
|
19
|
+
File.join(path, first(identifier), last(identifier), identifier, *parts)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
data/lib/archdown.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'archdown/library'
|
3
|
+
|
4
|
+
describe Archdown::Library do
|
5
|
+
let(:library) { Archdown::Library.new("/tmp/library") }
|
6
|
+
|
7
|
+
it "initializes" do
|
8
|
+
expect{ library }.to_not raise_error
|
9
|
+
end
|
10
|
+
|
11
|
+
it "gets first 2 chars" do
|
12
|
+
expect(library.first('happy')).to eq('ha')
|
13
|
+
end
|
14
|
+
|
15
|
+
it "gets last 2 chars" do
|
16
|
+
expect(library.last('happy')).to eq('py')
|
17
|
+
end
|
18
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# This file was generated by the `rspec --init` command. Conventionally, all
|
2
|
+
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
|
3
|
+
# Require this file using `require "spec_helper"` to ensure that it is only
|
4
|
+
# loaded once.
|
5
|
+
#
|
6
|
+
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
7
|
+
|
8
|
+
module Fixtures
|
9
|
+
def fixture(name)
|
10
|
+
File.join(File.dirname(__FILE__), 'fixtures', name)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
RSpec.configure do |config|
|
15
|
+
config.run_all_when_everything_filtered = true
|
16
|
+
config.filter_run :focus
|
17
|
+
|
18
|
+
config.include Fixtures
|
19
|
+
|
20
|
+
# Run specs in random order to surface order dependencies. If you find an
|
21
|
+
# order dependency and want to debug it, you can fix the order by providing
|
22
|
+
# the seed, which is printed after each run.
|
23
|
+
# --seed 1234
|
24
|
+
config.order = 'random'
|
25
|
+
end
|
metadata
ADDED
@@ -0,0 +1,175 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: archdown
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '0.3'
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Duane Johnson
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2014-07-14 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: archivist-client
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: bundler
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ~>
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '1.3'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '1.3'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rake
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: debugger
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
type: :development
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: rspec
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
86
|
+
type: :development
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
- !ruby/object:Gem::Dependency
|
95
|
+
name: guard
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ! '>='
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
type: :development
|
103
|
+
prerelease: false
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ! '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
110
|
+
- !ruby/object:Gem::Dependency
|
111
|
+
name: guard-rspec
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
114
|
+
requirements:
|
115
|
+
- - ! '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
none: false
|
122
|
+
requirements:
|
123
|
+
- - ! '>='
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '0'
|
126
|
+
description: Archive.org download library with command-line tool
|
127
|
+
email:
|
128
|
+
- duane.johnson@gmail.com
|
129
|
+
executables:
|
130
|
+
- archdown
|
131
|
+
extensions: []
|
132
|
+
extra_rdoc_files: []
|
133
|
+
files:
|
134
|
+
- Gemfile
|
135
|
+
- Gemfile.lock
|
136
|
+
- Guardfile
|
137
|
+
- LICENSE
|
138
|
+
- README.md
|
139
|
+
- bin/archdown
|
140
|
+
- lib/archdown.rb
|
141
|
+
- lib/archdown/download.rb
|
142
|
+
- lib/archdown/librarian.rb
|
143
|
+
- lib/archdown/library.rb
|
144
|
+
- lib/archdown/version.rb
|
145
|
+
- spec/archdown/library_spec.rb
|
146
|
+
- spec/spec_helper.rb
|
147
|
+
homepage: ''
|
148
|
+
licenses:
|
149
|
+
- MIT
|
150
|
+
post_install_message:
|
151
|
+
rdoc_options: []
|
152
|
+
require_paths:
|
153
|
+
- lib
|
154
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
155
|
+
none: false
|
156
|
+
requirements:
|
157
|
+
- - ! '>='
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0'
|
160
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
161
|
+
none: false
|
162
|
+
requirements:
|
163
|
+
- - ! '>='
|
164
|
+
- !ruby/object:Gem::Version
|
165
|
+
version: '0'
|
166
|
+
requirements: []
|
167
|
+
rubyforge_project:
|
168
|
+
rubygems_version: 1.8.23
|
169
|
+
signing_key:
|
170
|
+
specification_version: 3
|
171
|
+
summary: Archive.org download library
|
172
|
+
test_files:
|
173
|
+
- spec/archdown/library_spec.rb
|
174
|
+
- spec/spec_helper.rb
|
175
|
+
has_rdoc:
|