archdown 0.3

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,107 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ archdown (0.3)
5
+ archivist-client
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ archivist-client (0.1.6)
11
+ faraday
12
+ faraday_middleware
13
+ representable
14
+ virtus (~> 1.0.0)
15
+ axiom-types (0.1.1)
16
+ descendants_tracker (~> 0.0.4)
17
+ ice_nine (~> 0.11.0)
18
+ thread_safe (~> 0.3, >= 0.3.1)
19
+ celluloid (0.15.2)
20
+ timers (~> 1.1.0)
21
+ coderay (1.1.0)
22
+ coercible (1.0.0)
23
+ descendants_tracker (~> 0.0.1)
24
+ columnize (0.8.9)
25
+ debugger (1.6.8)
26
+ columnize (>= 0.3.1)
27
+ debugger-linecache (~> 1.2.0)
28
+ debugger-ruby_core_source (~> 1.3.5)
29
+ debugger-linecache (1.2.0)
30
+ debugger-ruby_core_source (1.3.5)
31
+ descendants_tracker (0.0.4)
32
+ thread_safe (~> 0.3, >= 0.3.1)
33
+ diff-lcs (1.2.5)
34
+ equalizer (0.0.9)
35
+ faraday (0.9.0)
36
+ multipart-post (>= 1.2, < 3)
37
+ faraday_middleware (0.9.1)
38
+ faraday (>= 0.7.4, < 0.10)
39
+ ffi (1.9.3)
40
+ formatador (0.2.5)
41
+ guard (2.6.1)
42
+ formatador (>= 0.2.4)
43
+ listen (~> 2.7)
44
+ lumberjack (~> 1.0)
45
+ pry (>= 0.9.12)
46
+ thor (>= 0.18.1)
47
+ guard-rspec (4.2.10)
48
+ guard (~> 2.1)
49
+ rspec (>= 2.14, < 4.0)
50
+ ice_nine (0.11.0)
51
+ listen (2.7.9)
52
+ celluloid (>= 0.15.2)
53
+ rb-fsevent (>= 0.9.3)
54
+ rb-inotify (>= 0.9)
55
+ lumberjack (1.0.9)
56
+ method_source (0.8.2)
57
+ mini_portile (0.6.0)
58
+ multi_json (1.10.1)
59
+ multipart-post (2.0.0)
60
+ nokogiri (1.6.2.1)
61
+ mini_portile (= 0.6.0)
62
+ pry (0.10.0)
63
+ coderay (~> 1.1.0)
64
+ method_source (~> 0.8.1)
65
+ slop (~> 3.4)
66
+ rake (10.3.2)
67
+ rb-fsevent (0.9.4)
68
+ rb-inotify (0.9.5)
69
+ ffi (>= 0.5.0)
70
+ representable (1.8.5)
71
+ multi_json
72
+ nokogiri
73
+ uber
74
+ rspec (3.0.0)
75
+ rspec-core (~> 3.0.0)
76
+ rspec-expectations (~> 3.0.0)
77
+ rspec-mocks (~> 3.0.0)
78
+ rspec-core (3.0.2)
79
+ rspec-support (~> 3.0.0)
80
+ rspec-expectations (3.0.2)
81
+ diff-lcs (>= 1.2.0, < 2.0)
82
+ rspec-support (~> 3.0.0)
83
+ rspec-mocks (3.0.2)
84
+ rspec-support (~> 3.0.0)
85
+ rspec-support (3.0.2)
86
+ slop (3.5.0)
87
+ thor (0.19.1)
88
+ thread_safe (0.3.4)
89
+ timers (1.1.0)
90
+ uber (0.0.7)
91
+ virtus (1.0.2)
92
+ axiom-types (~> 0.1)
93
+ coercible (~> 1.0)
94
+ descendants_tracker (~> 0.0.3)
95
+ equalizer (~> 0.0.9)
96
+
97
+ PLATFORMS
98
+ ruby
99
+
100
+ DEPENDENCIES
101
+ archdown!
102
+ bundler (~> 1.3)
103
+ debugger
104
+ guard
105
+ guard-rspec
106
+ rake
107
+ rspec
data/Guardfile ADDED
@@ -0,0 +1,8 @@
1
+ # A sample Guardfile
2
+ # More info at https://github.com/guard/guard#readme
3
+
4
+ guard :rspec do
5
+ watch(%r{^spec/.+_spec\.rb$})
6
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
7
+ watch('spec/spec_helper.rb') { "spec" }
8
+ end
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2014 WordTree Foundation
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,16 @@
1
+ archdown
2
+ ========
3
+
4
+ Command-line utility to download books from archive.org using archivist-client
5
+
6
+ Example Usage
7
+ -------------
8
+
9
+ ```sh
10
+ $ bundle install
11
+ $ bundle exec ruby archdown.rb -y 1700-1830
12
+
13
+ # ...
14
+ # downloads a lot of books into ./library
15
+ # ...
16
+ ```
data/bin/archdown ADDED
@@ -0,0 +1,43 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "archdown"
4
+ require 'optparse'
5
+
6
+ # download books (text files) from archive.org
7
+
8
+ options = {
9
+ :library_root => './library',
10
+ :start_year => 1700,
11
+ :end_year => 1700
12
+ }
13
+
14
+ parser = OptionParser.new do |opts|
15
+ opts.banner = "Usage: archdown [options]"
16
+
17
+ opts.on("-l", "--library PATH", "Store files in PATH") do |lib|
18
+ options[:library_root] = lib
19
+ end
20
+
21
+ opts.on("-y", "--year RANGE", "Query for books published within RANGE years, e.g. 1700-1750") do |range|
22
+ options[:start_year] = range.split('-').first.to_i
23
+ options[:end_year] = range.split('-').last.to_i
24
+ end
25
+ end
26
+ parser.parse!
27
+
28
+ if ARGV.empty?
29
+ download = Archdown::Download.new(
30
+ options[:library_root],
31
+ :start_year => options[:start_year],
32
+ :end_year => options[:end_year])
33
+ download.go! do |metadata, librarian|
34
+ puts metadata.to_yaml
35
+ end
36
+ else
37
+ puts parser
38
+ puts
39
+ p options, ARGV
40
+ end
41
+
42
+
43
+
@@ -0,0 +1,26 @@
1
+ require 'archivist/client'
2
+
3
+ require 'archdown/librarian'
4
+ require 'archdown/library'
5
+
6
+ module Archdown
7
+ class Download
8
+ attr_reader :library_root, :search_terms
9
+
10
+ def initialize(library_root, search_terms)
11
+ @library = Library.new(library_root)
12
+ @search_terms = search_terms
13
+ @client = Archivist::Client::Base.new
14
+ end
15
+
16
+ def go!(&each_book)
17
+ page = 1
18
+ while not (books = @client.search(@search_terms.merge(:page => page))).empty?
19
+ books.each do |book|
20
+ Librarian.new(@library, book).store_book(&each_book)
21
+ end
22
+ page += 1
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,64 @@
1
+ require 'fileutils'
2
+ require 'yaml'
3
+
4
+ require 'archivist/client'
5
+
6
+ module Archdown
7
+ # The Librarian takes a book and puts it in the library
8
+ class Librarian
9
+ attr_reader :library, :book
10
+
11
+ def initialize(library, book)
12
+ @library = library
13
+ @book = book
14
+ @failure = nil
15
+ end
16
+
17
+ def metadata
18
+ {
19
+ 'title' => @book.title,
20
+ 'author' => @book.creators ? @book.creators.join(';') : nil,
21
+ 'year' => @book.date.year,
22
+ 'source' => "http://archive.org/details/#{@book.identifier}",
23
+ 'status' => "OCR ONLY",
24
+ 'archive_org_id' => @book.identifier,
25
+ }.tap do |meta|
26
+ meta['failure'] = @failure if @failure
27
+ end
28
+ end
29
+
30
+ def book_dir
31
+ @library.path_for_identifier(@book.identifier)
32
+ end
33
+
34
+ def book_filepath
35
+ File.join(book_dir, @book.identifier + '.md')
36
+ end
37
+
38
+ def make_book_dir
39
+ FileUtils.mkdir_p(book_dir)
40
+ end
41
+
42
+ def store_book(&block)
43
+ make_book_dir
44
+
45
+ begin
46
+ text = @book.download
47
+ rescue Archivist::Model::Document::UnsupportedFormat => e
48
+ @failure = e.to_s
49
+ rescue StandardError => e
50
+ @failure = e.to_s
51
+ end
52
+
53
+ yield metadata, self if block_given?
54
+
55
+ content = metadata.to_yaml
56
+ content += "---\n"
57
+ content += text if text
58
+
59
+ File.open(book_filepath, "w") do |file|
60
+ file.write content
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,22 @@
1
+ module Archdown
2
+ # The Library knows about its locations for books (e.g. file directory)
3
+ class Library
4
+ attr_reader :path
5
+
6
+ def initialize(path)
7
+ @path = path
8
+ end
9
+
10
+ def first(identifier)
11
+ identifier[0..1].downcase
12
+ end
13
+
14
+ def last(identifier)
15
+ identifier[-2..-1].downcase
16
+ end
17
+
18
+ def path_for_identifier(identifier, *parts)
19
+ File.join(path, first(identifier), last(identifier), identifier, *parts)
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,3 @@
1
+ module Archdown
2
+ VERSION = '0.3'
3
+ end
data/lib/archdown.rb ADDED
@@ -0,0 +1,3 @@
1
+ require 'archdown/library'
2
+ require 'archdown/librarian'
3
+ require 'archdown/download'
@@ -0,0 +1,18 @@
1
+ require 'spec_helper'
2
+ require 'archdown/library'
3
+
4
+ describe Archdown::Library do
5
+ let(:library) { Archdown::Library.new("/tmp/library") }
6
+
7
+ it "initializes" do
8
+ expect{ library }.to_not raise_error
9
+ end
10
+
11
+ it "gets first 2 chars" do
12
+ expect(library.first('happy')).to eq('ha')
13
+ end
14
+
15
+ it "gets last 2 chars" do
16
+ expect(library.last('happy')).to eq('py')
17
+ end
18
+ end
@@ -0,0 +1,25 @@
1
+ # This file was generated by the `rspec --init` command. Conventionally, all
2
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
3
+ # Require this file using `require "spec_helper"` to ensure that it is only
4
+ # loaded once.
5
+ #
6
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
7
+
8
+ module Fixtures
9
+ def fixture(name)
10
+ File.join(File.dirname(__FILE__), 'fixtures', name)
11
+ end
12
+ end
13
+
14
+ RSpec.configure do |config|
15
+ config.run_all_when_everything_filtered = true
16
+ config.filter_run :focus
17
+
18
+ config.include Fixtures
19
+
20
+ # Run specs in random order to surface order dependencies. If you find an
21
+ # order dependency and want to debug it, you can fix the order by providing
22
+ # the seed, which is printed after each run.
23
+ # --seed 1234
24
+ config.order = 'random'
25
+ end
metadata ADDED
@@ -0,0 +1,175 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: archdown
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.3'
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Duane Johnson
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-07-14 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: archivist-client
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: bundler
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: '1.3'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: '1.3'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rake
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: debugger
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: rspec
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ - !ruby/object:Gem::Dependency
95
+ name: guard
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ type: :development
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ - !ruby/object:Gem::Dependency
111
+ name: guard-rspec
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
126
+ description: Archive.org download library with command-line tool
127
+ email:
128
+ - duane.johnson@gmail.com
129
+ executables:
130
+ - archdown
131
+ extensions: []
132
+ extra_rdoc_files: []
133
+ files:
134
+ - Gemfile
135
+ - Gemfile.lock
136
+ - Guardfile
137
+ - LICENSE
138
+ - README.md
139
+ - bin/archdown
140
+ - lib/archdown.rb
141
+ - lib/archdown/download.rb
142
+ - lib/archdown/librarian.rb
143
+ - lib/archdown/library.rb
144
+ - lib/archdown/version.rb
145
+ - spec/archdown/library_spec.rb
146
+ - spec/spec_helper.rb
147
+ homepage: ''
148
+ licenses:
149
+ - MIT
150
+ post_install_message:
151
+ rdoc_options: []
152
+ require_paths:
153
+ - lib
154
+ required_ruby_version: !ruby/object:Gem::Requirement
155
+ none: false
156
+ requirements:
157
+ - - ! '>='
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ required_rubygems_version: !ruby/object:Gem::Requirement
161
+ none: false
162
+ requirements:
163
+ - - ! '>='
164
+ - !ruby/object:Gem::Version
165
+ version: '0'
166
+ requirements: []
167
+ rubyforge_project:
168
+ rubygems_version: 1.8.23
169
+ signing_key:
170
+ specification_version: 3
171
+ summary: Archive.org download library
172
+ test_files:
173
+ - spec/archdown/library_spec.rb
174
+ - spec/spec_helper.rb
175
+ has_rdoc: