archdown 0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,107 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ archdown (0.3)
5
+ archivist-client
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ archivist-client (0.1.6)
11
+ faraday
12
+ faraday_middleware
13
+ representable
14
+ virtus (~> 1.0.0)
15
+ axiom-types (0.1.1)
16
+ descendants_tracker (~> 0.0.4)
17
+ ice_nine (~> 0.11.0)
18
+ thread_safe (~> 0.3, >= 0.3.1)
19
+ celluloid (0.15.2)
20
+ timers (~> 1.1.0)
21
+ coderay (1.1.0)
22
+ coercible (1.0.0)
23
+ descendants_tracker (~> 0.0.1)
24
+ columnize (0.8.9)
25
+ debugger (1.6.8)
26
+ columnize (>= 0.3.1)
27
+ debugger-linecache (~> 1.2.0)
28
+ debugger-ruby_core_source (~> 1.3.5)
29
+ debugger-linecache (1.2.0)
30
+ debugger-ruby_core_source (1.3.5)
31
+ descendants_tracker (0.0.4)
32
+ thread_safe (~> 0.3, >= 0.3.1)
33
+ diff-lcs (1.2.5)
34
+ equalizer (0.0.9)
35
+ faraday (0.9.0)
36
+ multipart-post (>= 1.2, < 3)
37
+ faraday_middleware (0.9.1)
38
+ faraday (>= 0.7.4, < 0.10)
39
+ ffi (1.9.3)
40
+ formatador (0.2.5)
41
+ guard (2.6.1)
42
+ formatador (>= 0.2.4)
43
+ listen (~> 2.7)
44
+ lumberjack (~> 1.0)
45
+ pry (>= 0.9.12)
46
+ thor (>= 0.18.1)
47
+ guard-rspec (4.2.10)
48
+ guard (~> 2.1)
49
+ rspec (>= 2.14, < 4.0)
50
+ ice_nine (0.11.0)
51
+ listen (2.7.9)
52
+ celluloid (>= 0.15.2)
53
+ rb-fsevent (>= 0.9.3)
54
+ rb-inotify (>= 0.9)
55
+ lumberjack (1.0.9)
56
+ method_source (0.8.2)
57
+ mini_portile (0.6.0)
58
+ multi_json (1.10.1)
59
+ multipart-post (2.0.0)
60
+ nokogiri (1.6.2.1)
61
+ mini_portile (= 0.6.0)
62
+ pry (0.10.0)
63
+ coderay (~> 1.1.0)
64
+ method_source (~> 0.8.1)
65
+ slop (~> 3.4)
66
+ rake (10.3.2)
67
+ rb-fsevent (0.9.4)
68
+ rb-inotify (0.9.5)
69
+ ffi (>= 0.5.0)
70
+ representable (1.8.5)
71
+ multi_json
72
+ nokogiri
73
+ uber
74
+ rspec (3.0.0)
75
+ rspec-core (~> 3.0.0)
76
+ rspec-expectations (~> 3.0.0)
77
+ rspec-mocks (~> 3.0.0)
78
+ rspec-core (3.0.2)
79
+ rspec-support (~> 3.0.0)
80
+ rspec-expectations (3.0.2)
81
+ diff-lcs (>= 1.2.0, < 2.0)
82
+ rspec-support (~> 3.0.0)
83
+ rspec-mocks (3.0.2)
84
+ rspec-support (~> 3.0.0)
85
+ rspec-support (3.0.2)
86
+ slop (3.5.0)
87
+ thor (0.19.1)
88
+ thread_safe (0.3.4)
89
+ timers (1.1.0)
90
+ uber (0.0.7)
91
+ virtus (1.0.2)
92
+ axiom-types (~> 0.1)
93
+ coercible (~> 1.0)
94
+ descendants_tracker (~> 0.0.3)
95
+ equalizer (~> 0.0.9)
96
+
97
+ PLATFORMS
98
+ ruby
99
+
100
+ DEPENDENCIES
101
+ archdown!
102
+ bundler (~> 1.3)
103
+ debugger
104
+ guard
105
+ guard-rspec
106
+ rake
107
+ rspec
data/Guardfile ADDED
@@ -0,0 +1,8 @@
1
+ # A sample Guardfile
2
+ # More info at https://github.com/guard/guard#readme
3
+
4
+ guard :rspec do
5
+ watch(%r{^spec/.+_spec\.rb$})
6
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
7
+ watch('spec/spec_helper.rb') { "spec" }
8
+ end
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2014 WordTree Foundation
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,16 @@
1
+ archdown
2
+ ========
3
+
4
+ Command-line utility to download books from archive.org using archivist-client
5
+
6
+ Example Usage
7
+ -------------
8
+
9
+ ```sh
10
+ $ bundle install
11
+ $ bundle exec ruby archdown.rb -y 1700-1830
12
+
13
+ # ...
14
+ # downloads a lot of books into ./library
15
+ # ...
16
+ ```
data/bin/archdown ADDED
@@ -0,0 +1,43 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "archdown"
4
+ require 'optparse'
5
+
6
+ # download books (text files) from archive.org
7
+
8
+ options = {
9
+ :library_root => './library',
10
+ :start_year => 1700,
11
+ :end_year => 1700
12
+ }
13
+
14
+ parser = OptionParser.new do |opts|
15
+ opts.banner = "Usage: archdown [options]"
16
+
17
+ opts.on("-l", "--library PATH", "Store files in PATH") do |lib|
18
+ options[:library_root] = lib
19
+ end
20
+
21
+ opts.on("-y", "--year RANGE", "Query for books published within RANGE years, e.g. 1700-1750") do |range|
22
+ options[:start_year] = range.split('-').first.to_i
23
+ options[:end_year] = range.split('-').last.to_i
24
+ end
25
+ end
26
+ parser.parse!
27
+
28
+ if ARGV.empty?
29
+ download = Archdown::Download.new(
30
+ options[:library_root],
31
+ :start_year => options[:start_year],
32
+ :end_year => options[:end_year])
33
+ download.go! do |metadata, librarian|
34
+ puts metadata.to_yaml
35
+ end
36
+ else
37
+ puts parser
38
+ puts
39
+ p options, ARGV
40
+ end
41
+
42
+
43
+
@@ -0,0 +1,26 @@
1
+ require 'archivist/client'
2
+
3
+ require 'archdown/librarian'
4
+ require 'archdown/library'
5
+
6
+ module Archdown
7
+ class Download
8
+ attr_reader :library_root, :search_terms
9
+
10
+ def initialize(library_root, search_terms)
11
+ @library = Library.new(library_root)
12
+ @search_terms = search_terms
13
+ @client = Archivist::Client::Base.new
14
+ end
15
+
16
+ def go!(&each_book)
17
+ page = 1
18
+ while not (books = @client.search(@search_terms.merge(:page => page))).empty?
19
+ books.each do |book|
20
+ Librarian.new(@library, book).store_book(&each_book)
21
+ end
22
+ page += 1
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,64 @@
1
+ require 'fileutils'
2
+ require 'yaml'
3
+
4
+ require 'archivist/client'
5
+
6
+ module Archdown
7
+ # The Librarian takes a book and puts it in the library
8
+ class Librarian
9
+ attr_reader :library, :book
10
+
11
+ def initialize(library, book)
12
+ @library = library
13
+ @book = book
14
+ @failure = nil
15
+ end
16
+
17
+ def metadata
18
+ {
19
+ 'title' => @book.title,
20
+ 'author' => @book.creators ? @book.creators.join(';') : nil,
21
+ 'year' => @book.date.year,
22
+ 'source' => "http://archive.org/details/#{@book.identifier}",
23
+ 'status' => "OCR ONLY",
24
+ 'archive_org_id' => @book.identifier,
25
+ }.tap do |meta|
26
+ meta['failure'] = @failure if @failure
27
+ end
28
+ end
29
+
30
+ def book_dir
31
+ @library.path_for_identifier(@book.identifier)
32
+ end
33
+
34
+ def book_filepath
35
+ File.join(book_dir, @book.identifier + '.md')
36
+ end
37
+
38
+ def make_book_dir
39
+ FileUtils.mkdir_p(book_dir)
40
+ end
41
+
42
+ def store_book(&block)
43
+ make_book_dir
44
+
45
+ begin
46
+ text = @book.download
47
+ rescue Archivist::Model::Document::UnsupportedFormat => e
48
+ @failure = e.to_s
49
+ rescue StandardError => e
50
+ @failure = e.to_s
51
+ end
52
+
53
+ yield metadata, self if block_given?
54
+
55
+ content = metadata.to_yaml
56
+ content += "---\n"
57
+ content += text if text
58
+
59
+ File.open(book_filepath, "w") do |file|
60
+ file.write content
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,22 @@
1
+ module Archdown
2
+ # The Library knows about its locations for books (e.g. file directory)
3
+ class Library
4
+ attr_reader :path
5
+
6
+ def initialize(path)
7
+ @path = path
8
+ end
9
+
10
+ def first(identifier)
11
+ identifier[0..1].downcase
12
+ end
13
+
14
+ def last(identifier)
15
+ identifier[-2..-1].downcase
16
+ end
17
+
18
+ def path_for_identifier(identifier, *parts)
19
+ File.join(path, first(identifier), last(identifier), identifier, *parts)
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,3 @@
1
+ module Archdown
2
+ VERSION = '0.3'
3
+ end
data/lib/archdown.rb ADDED
@@ -0,0 +1,3 @@
1
+ require 'archdown/library'
2
+ require 'archdown/librarian'
3
+ require 'archdown/download'
@@ -0,0 +1,18 @@
1
+ require 'spec_helper'
2
+ require 'archdown/library'
3
+
4
+ describe Archdown::Library do
5
+ let(:library) { Archdown::Library.new("/tmp/library") }
6
+
7
+ it "initializes" do
8
+ expect{ library }.to_not raise_error
9
+ end
10
+
11
+ it "gets first 2 chars" do
12
+ expect(library.first('happy')).to eq('ha')
13
+ end
14
+
15
+ it "gets last 2 chars" do
16
+ expect(library.last('happy')).to eq('py')
17
+ end
18
+ end
@@ -0,0 +1,25 @@
1
+ # This file was generated by the `rspec --init` command. Conventionally, all
2
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
3
+ # Require this file using `require "spec_helper"` to ensure that it is only
4
+ # loaded once.
5
+ #
6
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
7
+
8
+ module Fixtures
9
+ def fixture(name)
10
+ File.join(File.dirname(__FILE__), 'fixtures', name)
11
+ end
12
+ end
13
+
14
+ RSpec.configure do |config|
15
+ config.run_all_when_everything_filtered = true
16
+ config.filter_run :focus
17
+
18
+ config.include Fixtures
19
+
20
+ # Run specs in random order to surface order dependencies. If you find an
21
+ # order dependency and want to debug it, you can fix the order by providing
22
+ # the seed, which is printed after each run.
23
+ # --seed 1234
24
+ config.order = 'random'
25
+ end
metadata ADDED
@@ -0,0 +1,175 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: archdown
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.3'
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Duane Johnson
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-07-14 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: archivist-client
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: bundler
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: '1.3'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: '1.3'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rake
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: debugger
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: rspec
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ - !ruby/object:Gem::Dependency
95
+ name: guard
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ type: :development
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ - !ruby/object:Gem::Dependency
111
+ name: guard-rspec
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
126
+ description: Archive.org download library with command-line tool
127
+ email:
128
+ - duane.johnson@gmail.com
129
+ executables:
130
+ - archdown
131
+ extensions: []
132
+ extra_rdoc_files: []
133
+ files:
134
+ - Gemfile
135
+ - Gemfile.lock
136
+ - Guardfile
137
+ - LICENSE
138
+ - README.md
139
+ - bin/archdown
140
+ - lib/archdown.rb
141
+ - lib/archdown/download.rb
142
+ - lib/archdown/librarian.rb
143
+ - lib/archdown/library.rb
144
+ - lib/archdown/version.rb
145
+ - spec/archdown/library_spec.rb
146
+ - spec/spec_helper.rb
147
+ homepage: ''
148
+ licenses:
149
+ - MIT
150
+ post_install_message:
151
+ rdoc_options: []
152
+ require_paths:
153
+ - lib
154
+ required_ruby_version: !ruby/object:Gem::Requirement
155
+ none: false
156
+ requirements:
157
+ - - ! '>='
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ required_rubygems_version: !ruby/object:Gem::Requirement
161
+ none: false
162
+ requirements:
163
+ - - ! '>='
164
+ - !ruby/object:Gem::Version
165
+ version: '0'
166
+ requirements: []
167
+ rubyforge_project:
168
+ rubygems_version: 1.8.23
169
+ signing_key:
170
+ specification_version: 3
171
+ summary: Archive.org download library
172
+ test_files:
173
+ - spec/archdown/library_spec.rb
174
+ - spec/spec_helper.rb
175
+ has_rdoc: