wordtree 0.0.2 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +5 -0
- data/lib/wordtree/book.rb +15 -5
- data/lib/wordtree/db/librarian.rb +68 -0
- data/lib/wordtree/disk/librarian.rb +62 -0
- data/lib/wordtree/disk/library.rb +43 -0
- data/lib/wordtree/disk/library_locator.rb +46 -0
- data/lib/wordtree/version.rb +1 -1
- data/lib/wordtree.rb +5 -3
- data/spec/spec_helper.rb +6 -0
- data/spec/wordtree/book_spec.rb +1 -1
- data/spec/wordtree/db/librarian_spec.rb +58 -0
- data/spec/wordtree/disk/librarian_spec.rb +60 -0
- data/spec/wordtree/{library_spec.rb → disk/library_spec.rb} +3 -3
- data/wordtree.gemspec +2 -0
- metadata +44 -9
- data/lib/wordtree/librarian.rb +0 -58
- data/lib/wordtree/library.rb +0 -41
- data/lib/wordtree/library_locator.rb +0 -44
- data/spec/wordtree/librarian_spec.rb +0 -48
data/README.md
CHANGED
data/lib/wordtree/book.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
require 'virtus'
|
2
|
+
require 'simhash'
|
3
|
+
|
2
4
|
require 'wordtree/text_utils'
|
3
5
|
|
4
6
|
module WordTree
|
@@ -13,9 +15,9 @@ module WordTree
|
|
13
15
|
attribute :source, String
|
14
16
|
attribute :status, String
|
15
17
|
# Size of the content in bytes
|
16
|
-
attribute :size_bytes, Integer
|
18
|
+
attribute :size_bytes, Integer, :default => :content_size
|
17
19
|
# A simhash (locality-sensitive hash) of the content
|
18
|
-
attribute :simhash,
|
20
|
+
attribute :simhash, Integer, :default => :calculate_simhash
|
19
21
|
|
20
22
|
attribute :content, String
|
21
23
|
|
@@ -28,11 +30,19 @@ module WordTree
|
|
28
30
|
end
|
29
31
|
|
30
32
|
def metadata
|
31
|
-
attributes.select{ |k,v| !v.nil? && k != :content
|
33
|
+
attributes.select{ |k,v| !v.nil? && k != :content }
|
34
|
+
end
|
35
|
+
|
36
|
+
def content_clean(wrap=120)
|
37
|
+
TextUtils.clean_text(content, wrap)
|
38
|
+
end
|
39
|
+
|
40
|
+
def content_size
|
41
|
+
content ? content.size : nil
|
32
42
|
end
|
33
43
|
|
34
|
-
def
|
35
|
-
|
44
|
+
def calculate_simhash
|
45
|
+
content ? content_clean.simhash(:split_by => /\s/) : nil
|
36
46
|
end
|
37
47
|
end
|
38
48
|
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'rethinkdb'
|
2
|
+
require 'wordtree/book'
|
3
|
+
|
4
|
+
module WordTree
|
5
|
+
module DB
|
6
|
+
class Librarian
|
7
|
+
def initialize(rethinkdb_connection)
|
8
|
+
@rdb = rethinkdb_connection
|
9
|
+
@r = RethinkDB::RQL.new
|
10
|
+
end
|
11
|
+
|
12
|
+
def find(book_id)
|
13
|
+
result = @r.table('books').get(book_id).run(@rdb)
|
14
|
+
result ? Book.new(result) : nil
|
15
|
+
end
|
16
|
+
|
17
|
+
def save(book)
|
18
|
+
result = @r.table('books').insert(book.metadata, :upsert => true).run(@rdb)
|
19
|
+
return result["replaced"] == 1 || result["inserted"] == 1 || result["unchanged"] == 1
|
20
|
+
end
|
21
|
+
|
22
|
+
def search(params, page=1, per_page=20)
|
23
|
+
conditions = match_list(params,
|
24
|
+
[:id, :title, :author, :source, :status],
|
25
|
+
[:year, :size_bytes])
|
26
|
+
cursor = @r.table('books').
|
27
|
+
order_by(:index => 'year').
|
28
|
+
filter(&conditions).
|
29
|
+
skip((page-1)*per_page).
|
30
|
+
limit(per_page).
|
31
|
+
run(@rdb)
|
32
|
+
if !(results = cursor.to_a).empty?
|
33
|
+
results.map{ |result| Book.new(result) }
|
34
|
+
else
|
35
|
+
nil
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
protected
|
40
|
+
|
41
|
+
# Create a condition Proc suitable for RethinkDB search queries
|
42
|
+
def match_list(params, string_keys=[], numeric_keys=[], escape=true)
|
43
|
+
Proc.new do |record|
|
44
|
+
(
|
45
|
+
string_keys.map do |key|
|
46
|
+
if params[key]
|
47
|
+
term = escape ? Regexp.escape(params[key]) : params[key]
|
48
|
+
record[key.to_s].match("(?i)#{term}")
|
49
|
+
end
|
50
|
+
end +
|
51
|
+
numeric_keys.map do |key|
|
52
|
+
if params[key]
|
53
|
+
if params[key].include?(',')
|
54
|
+
low, high = params[key].split(',', 2).map{ |v| v.to_i }
|
55
|
+
(record[key.to_s] >= low) & (record[key.to_s] <= high)
|
56
|
+
else
|
57
|
+
value = params[key].to_i
|
58
|
+
record[key.to_s].eq(value)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
).compact.foldl{ |a,b| a & b }
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'preamble'
|
2
|
+
require 'wordtree/book'
|
3
|
+
require 'wordtree/disk/library'
|
4
|
+
require 'wordtree/archdown'
|
5
|
+
|
6
|
+
module WordTree
|
7
|
+
module Disk
|
8
|
+
class Librarian
|
9
|
+
attr_reader :library
|
10
|
+
|
11
|
+
def initialize(library)
|
12
|
+
@library = library
|
13
|
+
end
|
14
|
+
|
15
|
+
def find(book_id)
|
16
|
+
begin
|
17
|
+
retrieved = Preamble.load(library.path_to(book_id))
|
18
|
+
Book.create(book_id, retrieved.metadata, retrieved.content)
|
19
|
+
rescue Errno::ENOENT
|
20
|
+
nil
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def save(book)
|
25
|
+
library.mkdir(book.id)
|
26
|
+
Preamble.new(book.metadata, book.content || "").save(library.path_to(book.id))
|
27
|
+
end
|
28
|
+
|
29
|
+
def archive_org_get(*book_ids, &block)
|
30
|
+
book_ids.map do |book_id|
|
31
|
+
archive_org_get_with_conditions(identifier: book_id, &block)
|
32
|
+
end.flatten(1)
|
33
|
+
end
|
34
|
+
|
35
|
+
def archive_org_get_range_of_years(start_year, end_year, &block)
|
36
|
+
archive_org_get_with_conditions({
|
37
|
+
:start_year => start_year,
|
38
|
+
:end_year => end_year
|
39
|
+
}, &block)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Downloads a set of books to the on-disk library and
|
43
|
+
# returns a list of book_ids
|
44
|
+
def archive_org_get_with_conditions(conditions, &block)
|
45
|
+
archdown = Archdown.new
|
46
|
+
[].tap do |archive_org_ids|
|
47
|
+
archdown.download_all(conditions) do |metadata, content, failure|
|
48
|
+
if failure
|
49
|
+
#TODO: logging
|
50
|
+
$stderr.puts "Unable to download from archive.org: #{failure}"
|
51
|
+
else
|
52
|
+
book = Book.create(metadata["archive_org_id"], metadata, content)
|
53
|
+
save(book)
|
54
|
+
yield book, self if block_given?
|
55
|
+
archive_org_ids << book.id
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
|
3
|
+
require 'wordtree/archdown'
|
4
|
+
require 'wordtree/disk/library_locator'
|
5
|
+
|
6
|
+
module WordTree
|
7
|
+
module Disk
|
8
|
+
class Library
|
9
|
+
|
10
|
+
FILE_TYPES = {
|
11
|
+
:raw => "%s.md"
|
12
|
+
}
|
13
|
+
|
14
|
+
# The file path to the root of the library directory, e.g. /data/library
|
15
|
+
attr_reader :root
|
16
|
+
|
17
|
+
def initialize(root)
|
18
|
+
@root = root
|
19
|
+
end
|
20
|
+
|
21
|
+
# returns the full path of a book's subdirectory within the library
|
22
|
+
# Accepts either a String or a LibraryLocator object
|
23
|
+
def dir_of(book_id)
|
24
|
+
File.expand_path(LibraryLocator.identity(book_id).relpath, root)
|
25
|
+
end
|
26
|
+
|
27
|
+
def path_to(book_id, type=:raw)
|
28
|
+
File.join(dir_of(book_id), file_type(book_id, type))
|
29
|
+
end
|
30
|
+
|
31
|
+
def file_type(book_id, type=:raw)
|
32
|
+
locator = LibraryLocator.identity(book_id)
|
33
|
+
FILE_TYPES[type] % locator.id
|
34
|
+
end
|
35
|
+
|
36
|
+
# Create all subdirs up to the location where a book is stored
|
37
|
+
# Accepts either a String or a LibraryLocator object
|
38
|
+
def mkdir(book_id)
|
39
|
+
FileUtils.mkdir_p(dir_of(book_id))
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module WordTree
|
2
|
+
module Disk
|
3
|
+
# A class that converts from a book ID to a location within the library, e.g.
|
4
|
+
#
|
5
|
+
# "firstbooknapole00gruagoog"
|
6
|
+
#
|
7
|
+
# becomes
|
8
|
+
#
|
9
|
+
# "fi/og/firstbooknapole00gruagoog/"
|
10
|
+
#
|
11
|
+
# or, in context of the full path:
|
12
|
+
#
|
13
|
+
# [/data/library/] "fi/og/firstbooknapole00gruagoog/" [firstbooknapole00gruagoog.md]
|
14
|
+
#
|
15
|
+
class LibraryLocator
|
16
|
+
# The book ID to locate
|
17
|
+
attr_reader :id
|
18
|
+
|
19
|
+
# Construct a LibraryLocator from a string (book ID)
|
20
|
+
def initialize(id)
|
21
|
+
@id = id
|
22
|
+
end
|
23
|
+
|
24
|
+
def first
|
25
|
+
@id[0..1].downcase
|
26
|
+
end
|
27
|
+
|
28
|
+
def last
|
29
|
+
@id[-2..-1].downcase
|
30
|
+
end
|
31
|
+
|
32
|
+
# Returns a "relative" path to be joined to the library root,
|
33
|
+
# e.g. if the identifier is "firstbooknapole00gruagoog", then relpath
|
34
|
+
# should return "fi/og/firstbooknapole00gruagoog", i.e. probably later to
|
35
|
+
# become something like "/data/library/fi/og/firstbooknapole00gruagoog"
|
36
|
+
def relpath
|
37
|
+
File.join(first, last, @id)
|
38
|
+
end
|
39
|
+
|
40
|
+
# Constructor that is as willing to use a String as it is a LibraryLocator
|
41
|
+
def self.identity(id)
|
42
|
+
id.is_a?(LibraryLocator) ? id : new(id)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
data/lib/wordtree/version.rb
CHANGED
data/lib/wordtree.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
require "wordtree/version"
|
2
2
|
require "wordtree/book"
|
3
3
|
|
4
|
-
require "wordtree/
|
5
|
-
|
6
|
-
require "wordtree/
|
4
|
+
require "wordtree/db/librarian"
|
5
|
+
|
6
|
+
require "wordtree/disk/library"
|
7
|
+
require "wordtree/disk/librarian"
|
8
|
+
require "wordtree/disk/library_locator"
|
data/spec/spec_helper.rb
CHANGED
data/spec/wordtree/book_spec.rb
CHANGED
@@ -0,0 +1,58 @@
|
|
1
|
+
require_relative '../../spec_helper'
|
2
|
+
require 'tmpdir'
|
3
|
+
require 'preamble'
|
4
|
+
require 'wordtree/db/librarian'
|
5
|
+
|
6
|
+
describe WordTree::DB::Librarian do
|
7
|
+
it "instantiates" do
|
8
|
+
WordTree::DB::Librarian.new("connection")
|
9
|
+
end
|
10
|
+
|
11
|
+
context "with rethinkdb connection" do
|
12
|
+
let(:r) { RethinkDB::RQL.new }
|
13
|
+
let(:rdb) { r.connect(
|
14
|
+
:host => RDB_CONFIG[:host],
|
15
|
+
:port => RDB_CONFIG[:port],
|
16
|
+
:db => RDB_CONFIG[:db]
|
17
|
+
) }
|
18
|
+
|
19
|
+
let(:librarian) { WordTree::DB::Librarian.new(rdb) }
|
20
|
+
|
21
|
+
before(:each) do
|
22
|
+
begin
|
23
|
+
r.table_drop('books').run(rdb)
|
24
|
+
rescue RethinkDB::RqlRuntimeError
|
25
|
+
ensure
|
26
|
+
r.table_create('books').run(rdb)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
describe "#find" do
|
31
|
+
it "returns nil if book doesn't exist" do
|
32
|
+
book = librarian.find("abc")
|
33
|
+
expect(book).to be_nil
|
34
|
+
end
|
35
|
+
|
36
|
+
it "finds a book" do
|
37
|
+
r.table('books').insert(
|
38
|
+
:id => "firstbooknapole00gruagoog",
|
39
|
+
:year => 1809).run(rdb)
|
40
|
+
book = librarian.find("firstbooknapole00gruagoog")
|
41
|
+
expect(book).to be_a(WordTree::Book)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
describe "#save" do
|
46
|
+
it "saves a book to the db" do
|
47
|
+
book = WordTree::Book.create('test', {:year => 1800}, "body")
|
48
|
+
librarian.save(book)
|
49
|
+
result = r.table('books').run(rdb).to_a
|
50
|
+
expect(result).to eq([{
|
51
|
+
"id" => "test",
|
52
|
+
"simhash" => 1318950168412674304,
|
53
|
+
"size_bytes" => 4,
|
54
|
+
"year" => 1800}])
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
require_relative '../../spec_helper'
|
2
|
+
require 'tmpdir'
|
3
|
+
require 'preamble'
|
4
|
+
require 'wordtree/disk/librarian'
|
5
|
+
|
6
|
+
describe WordTree::Disk::Librarian do
|
7
|
+
let(:root) { Dir.mktmpdir }
|
8
|
+
let(:library) { WordTree::Disk::Library.new(root) }
|
9
|
+
let(:librarian) { WordTree::Disk::Librarian.new(library) }
|
10
|
+
|
11
|
+
it "downloads an archive.org book" do
|
12
|
+
VCR.use_cassette('archive_org_download_book') do
|
13
|
+
librarian.archive_org_get("firstbooknapole00gruagoog")
|
14
|
+
book = librarian.find("firstbooknapole00gruagoog")
|
15
|
+
expect(book.year).to eq(1809)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
context "with fixture library" do
|
20
|
+
# Need a read-only library with fixtures in it for some tests
|
21
|
+
let(:root) { fixture("library") }
|
22
|
+
|
23
|
+
describe "#find" do
|
24
|
+
it "returns nil if the book is not found" do
|
25
|
+
book = librarian.find("nobook")
|
26
|
+
expect(book).to be_nil
|
27
|
+
end
|
28
|
+
|
29
|
+
it "loads book from disk" do
|
30
|
+
book = librarian.find("book")
|
31
|
+
expect(book.id).to eq("book")
|
32
|
+
expect(book.year).to eq(1800)
|
33
|
+
expect(book.content).to eq("Book with content")
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
it "saves to disk (yaml, content)" do
|
38
|
+
tmp_root = Dir.mktmpdir
|
39
|
+
tmp_library = WordTree::Disk::Library.new(tmp_root)
|
40
|
+
tmp_librarian = WordTree::Disk::Librarian.new(tmp_library)
|
41
|
+
|
42
|
+
book = librarian.find("book")
|
43
|
+
|
44
|
+
book.source = "test"
|
45
|
+
book.content += "."
|
46
|
+
|
47
|
+
tmp_librarian.save(book)
|
48
|
+
|
49
|
+
updated = Preamble.load(tmp_library.path_to("book"))
|
50
|
+
expect(updated.metadata).to eq(
|
51
|
+
:id => "book",
|
52
|
+
:year => 1800,
|
53
|
+
:source => "test",
|
54
|
+
:simhash => 14921967289891934128,
|
55
|
+
:size_bytes => 17)
|
56
|
+
expect(updated.content).to eq("Book with content.")
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
end
|
@@ -1,10 +1,10 @@
|
|
1
1
|
require 'spec_helper'
|
2
|
-
require 'wordtree/library'
|
2
|
+
require 'wordtree/disk/library'
|
3
3
|
require 'tmpdir'
|
4
4
|
|
5
|
-
describe WordTree::Library do
|
5
|
+
describe WordTree::Disk::Library do
|
6
6
|
let(:root) { Dir.mktmpdir('library') }
|
7
|
-
let(:library) { WordTree::Library.new(root) }
|
7
|
+
let(:library) { WordTree::Disk::Library.new(root) }
|
8
8
|
|
9
9
|
it "initializes with path" do
|
10
10
|
expect(library.root).to eq(root)
|
data/wordtree.gemspec
CHANGED
@@ -22,6 +22,8 @@ Gem::Specification.new do |spec|
|
|
22
22
|
spec.add_dependency "preamble", ">= 0.0.3"
|
23
23
|
spec.add_dependency "archivist-client", ">= 0.1.7"
|
24
24
|
spec.add_dependency "retriable"
|
25
|
+
spec.add_dependency "simhash", "0.2.5"
|
26
|
+
spec.add_dependency "rethinkdb", "~> 1.13.0"
|
25
27
|
|
26
28
|
spec.add_development_dependency "bundler", "~> 1.3"
|
27
29
|
spec.add_development_dependency "rake"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wordtree
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-07-
|
12
|
+
date: 2014-07-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: virtus
|
@@ -75,6 +75,38 @@ dependencies:
|
|
75
75
|
- - ! '>='
|
76
76
|
- !ruby/object:Gem::Version
|
77
77
|
version: '0'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: simhash
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - '='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: 0.2.5
|
86
|
+
type: :runtime
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - '='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: 0.2.5
|
94
|
+
- !ruby/object:Gem::Dependency
|
95
|
+
name: rethinkdb
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ~>
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: 1.13.0
|
102
|
+
type: :runtime
|
103
|
+
prerelease: false
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ~>
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: 1.13.0
|
78
110
|
- !ruby/object:Gem::Dependency
|
79
111
|
name: bundler
|
80
112
|
requirement: !ruby/object:Gem::Requirement
|
@@ -220,17 +252,19 @@ files:
|
|
220
252
|
- lib/wordtree.rb
|
221
253
|
- lib/wordtree/archdown.rb
|
222
254
|
- lib/wordtree/book.rb
|
223
|
-
- lib/wordtree/librarian.rb
|
224
|
-
- lib/wordtree/
|
225
|
-
- lib/wordtree/
|
255
|
+
- lib/wordtree/db/librarian.rb
|
256
|
+
- lib/wordtree/disk/librarian.rb
|
257
|
+
- lib/wordtree/disk/library.rb
|
258
|
+
- lib/wordtree/disk/library_locator.rb
|
226
259
|
- lib/wordtree/text_utils.rb
|
227
260
|
- lib/wordtree/version.rb
|
228
261
|
- spec/fixtures/cassettes/archive_org_download_book.yml
|
229
262
|
- spec/fixtures/library/bo/ok/book/book.md
|
230
263
|
- spec/spec_helper.rb
|
231
264
|
- spec/wordtree/book_spec.rb
|
232
|
-
- spec/wordtree/librarian_spec.rb
|
233
|
-
- spec/wordtree/
|
265
|
+
- spec/wordtree/db/librarian_spec.rb
|
266
|
+
- spec/wordtree/disk/librarian_spec.rb
|
267
|
+
- spec/wordtree/disk/library_spec.rb
|
234
268
|
- spec/wordtree/text_utils_spec.rb
|
235
269
|
- wordtree.gemspec
|
236
270
|
homepage: ''
|
@@ -263,7 +297,8 @@ test_files:
|
|
263
297
|
- spec/fixtures/library/bo/ok/book/book.md
|
264
298
|
- spec/spec_helper.rb
|
265
299
|
- spec/wordtree/book_spec.rb
|
266
|
-
- spec/wordtree/librarian_spec.rb
|
267
|
-
- spec/wordtree/
|
300
|
+
- spec/wordtree/db/librarian_spec.rb
|
301
|
+
- spec/wordtree/disk/librarian_spec.rb
|
302
|
+
- spec/wordtree/disk/library_spec.rb
|
268
303
|
- spec/wordtree/text_utils_spec.rb
|
269
304
|
has_rdoc:
|
data/lib/wordtree/librarian.rb
DELETED
@@ -1,58 +0,0 @@
|
|
1
|
-
require 'preamble'
|
2
|
-
require 'wordtree/book'
|
3
|
-
require 'wordtree/library'
|
4
|
-
require 'wordtree/archdown'
|
5
|
-
|
6
|
-
module WordTree
|
7
|
-
class Librarian
|
8
|
-
attr_reader :library
|
9
|
-
|
10
|
-
def initialize(library)
|
11
|
-
@library = library
|
12
|
-
end
|
13
|
-
|
14
|
-
def find(book_id)
|
15
|
-
retrieved = Preamble.load(library.path_to(book_id))
|
16
|
-
Book.create(book_id, retrieved.metadata, retrieved.content)
|
17
|
-
end
|
18
|
-
|
19
|
-
def save(book)
|
20
|
-
library.mkdir(book.id)
|
21
|
-
Preamble.new(book.metadata, book.content).save(library.path_to(book.id))
|
22
|
-
end
|
23
|
-
|
24
|
-
def archive_org_get(*book_ids, &block)
|
25
|
-
book_ids.map do |book_id|
|
26
|
-
archive_org_get_with_conditions(identifier: book_id, &block)
|
27
|
-
end.flatten(1)
|
28
|
-
end
|
29
|
-
|
30
|
-
def archive_org_get_range_of_years(start_year, end_year, &block)
|
31
|
-
archive_org_get_with_conditions({
|
32
|
-
:start_year => start_year,
|
33
|
-
:end_year => end_year
|
34
|
-
}, &block)
|
35
|
-
end
|
36
|
-
|
37
|
-
# Downloads a set of books to the on-disk library and
|
38
|
-
# returns a list of book_ids
|
39
|
-
def archive_org_get_with_conditions(conditions, &block)
|
40
|
-
archdown = Archdown.new
|
41
|
-
[].tap do |archive_org_ids|
|
42
|
-
archdown.download_all(conditions) do |metadata, content, failure|
|
43
|
-
if failure
|
44
|
-
#TODO: logging
|
45
|
-
$stderr.puts "Unable to download from archive.org: #{failure}"
|
46
|
-
else
|
47
|
-
book = Book.create(metadata["archive_org_id"], metadata, content)
|
48
|
-
save(book)
|
49
|
-
yield book, self if block_given?
|
50
|
-
archive_org_ids << book.id
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
|
57
|
-
end
|
58
|
-
end
|
data/lib/wordtree/library.rb
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
require 'fileutils'
|
2
|
-
|
3
|
-
require 'wordtree/archdown'
|
4
|
-
require 'wordtree/library_locator'
|
5
|
-
|
6
|
-
module WordTree
|
7
|
-
class Library
|
8
|
-
|
9
|
-
FILE_TYPES = {
|
10
|
-
:raw => "%s.md"
|
11
|
-
}
|
12
|
-
|
13
|
-
# The file path to the root of the library directory, e.g. /data/library
|
14
|
-
attr_reader :root
|
15
|
-
|
16
|
-
def initialize(root)
|
17
|
-
@root = root
|
18
|
-
end
|
19
|
-
|
20
|
-
# returns the full path of a book's subdirectory within the library
|
21
|
-
# Accepts either a String or a LibraryLocator object
|
22
|
-
def dir_of(book_id)
|
23
|
-
File.expand_path(LibraryLocator.identity(book_id).relpath, root)
|
24
|
-
end
|
25
|
-
|
26
|
-
def path_to(book_id, type=:raw)
|
27
|
-
File.join(dir_of(book_id), file_type(book_id, type))
|
28
|
-
end
|
29
|
-
|
30
|
-
def file_type(book_id, type=:raw)
|
31
|
-
locator = LibraryLocator.identity(book_id)
|
32
|
-
FILE_TYPES[type] % locator.id
|
33
|
-
end
|
34
|
-
|
35
|
-
# Create all subdirs up to the location where a book is stored
|
36
|
-
# Accepts either a String or a LibraryLocator object
|
37
|
-
def mkdir(book_id)
|
38
|
-
FileUtils.mkdir_p(dir_of(book_id))
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|
@@ -1,44 +0,0 @@
|
|
1
|
-
module WordTree
|
2
|
-
# A class that converts from a book ID to a location within the library, e.g.
|
3
|
-
#
|
4
|
-
# "firstbooknapole00gruagoog"
|
5
|
-
#
|
6
|
-
# becomes
|
7
|
-
#
|
8
|
-
# "fi/og/firstbooknapole00gruagoog/"
|
9
|
-
#
|
10
|
-
# or, in context of the full path:
|
11
|
-
#
|
12
|
-
# [/data/library/] "fi/og/firstbooknapole00gruagoog/" [firstbooknapole00gruagoog.md]
|
13
|
-
#
|
14
|
-
class LibraryLocator
|
15
|
-
# The book ID to locate
|
16
|
-
attr_reader :id
|
17
|
-
|
18
|
-
# Construct a LibraryLocator from a string (book ID)
|
19
|
-
def initialize(id)
|
20
|
-
@id = id
|
21
|
-
end
|
22
|
-
|
23
|
-
def first
|
24
|
-
@id[0..1].downcase
|
25
|
-
end
|
26
|
-
|
27
|
-
def last
|
28
|
-
@id[-2..-1].downcase
|
29
|
-
end
|
30
|
-
|
31
|
-
# Returns a "relative" path to be joined to the library root,
|
32
|
-
# e.g. if the identifier is "firstbooknapole00gruagoog", then relpath
|
33
|
-
# should return "fi/og/firstbooknapole00gruagoog", i.e. probably later to
|
34
|
-
# become something like "/data/library/fi/og/firstbooknapole00gruagoog"
|
35
|
-
def relpath
|
36
|
-
File.join(first, last, @id)
|
37
|
-
end
|
38
|
-
|
39
|
-
# Constructor that is as willing to use a String as it is a LibraryLocator
|
40
|
-
def self.identity(id)
|
41
|
-
id.is_a?(LibraryLocator) ? id : new(id)
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
@@ -1,48 +0,0 @@
|
|
1
|
-
require_relative '../spec_helper'
|
2
|
-
require 'tmpdir'
|
3
|
-
require 'preamble'
|
4
|
-
require 'wordtree/librarian'
|
5
|
-
|
6
|
-
describe WordTree::Librarian do
|
7
|
-
let(:root) { Dir.mktmpdir }
|
8
|
-
let(:library) { WordTree::Library.new(root) }
|
9
|
-
let(:librarian) { WordTree::Librarian.new(library) }
|
10
|
-
|
11
|
-
it "downloads an archive.org book" do
|
12
|
-
VCR.use_cassette('archive_org_download_book') do
|
13
|
-
librarian.archive_org_get("firstbooknapole00gruagoog")
|
14
|
-
book = librarian.find("firstbooknapole00gruagoog")
|
15
|
-
expect(book.year).to eq(1809)
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
context "with fixture library" do
|
20
|
-
# Need a read-only library with fixtures in it for some tests
|
21
|
-
let(:root) { fixture("library") }
|
22
|
-
|
23
|
-
it "loads book from disk" do
|
24
|
-
book = librarian.find("book")
|
25
|
-
expect(book.id).to eq("book")
|
26
|
-
expect(book.year).to eq(1800)
|
27
|
-
expect(book.content).to eq("Book with content")
|
28
|
-
end
|
29
|
-
|
30
|
-
it "saves to disk (yaml, content)" do
|
31
|
-
tmp_root = Dir.mktmpdir
|
32
|
-
tmp_library = WordTree::Library.new(tmp_root)
|
33
|
-
tmp_librarian = WordTree::Librarian.new(tmp_library)
|
34
|
-
|
35
|
-
book = librarian.find("book")
|
36
|
-
|
37
|
-
book.source = "test"
|
38
|
-
book.content += "."
|
39
|
-
|
40
|
-
tmp_librarian.save(book)
|
41
|
-
|
42
|
-
updated = Preamble.load(tmp_library.path_to("book"))
|
43
|
-
expect(updated.metadata).to eq({:year => 1800, :source => "test"})
|
44
|
-
expect(updated.content).to eq("Book with content.")
|
45
|
-
end
|
46
|
-
|
47
|
-
end
|
48
|
-
end
|