wordtree 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +5 -0
- data/lib/wordtree/book.rb +15 -5
- data/lib/wordtree/db/librarian.rb +68 -0
- data/lib/wordtree/disk/librarian.rb +62 -0
- data/lib/wordtree/disk/library.rb +43 -0
- data/lib/wordtree/disk/library_locator.rb +46 -0
- data/lib/wordtree/version.rb +1 -1
- data/lib/wordtree.rb +5 -3
- data/spec/spec_helper.rb +6 -0
- data/spec/wordtree/book_spec.rb +1 -1
- data/spec/wordtree/db/librarian_spec.rb +58 -0
- data/spec/wordtree/disk/librarian_spec.rb +60 -0
- data/spec/wordtree/{library_spec.rb → disk/library_spec.rb} +3 -3
- data/wordtree.gemspec +2 -0
- metadata +44 -9
- data/lib/wordtree/librarian.rb +0 -58
- data/lib/wordtree/library.rb +0 -41
- data/lib/wordtree/library_locator.rb +0 -44
- data/spec/wordtree/librarian_spec.rb +0 -48
data/README.md
CHANGED
data/lib/wordtree/book.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
require 'virtus'
|
2
|
+
require 'simhash'
|
3
|
+
|
2
4
|
require 'wordtree/text_utils'
|
3
5
|
|
4
6
|
module WordTree
|
@@ -13,9 +15,9 @@ module WordTree
|
|
13
15
|
attribute :source, String
|
14
16
|
attribute :status, String
|
15
17
|
# Size of the content in bytes
|
16
|
-
attribute :size_bytes, Integer
|
18
|
+
attribute :size_bytes, Integer, :default => :content_size
|
17
19
|
# A simhash (locality-sensitive hash) of the content
|
18
|
-
attribute :simhash,
|
20
|
+
attribute :simhash, Integer, :default => :calculate_simhash
|
19
21
|
|
20
22
|
attribute :content, String
|
21
23
|
|
@@ -28,11 +30,19 @@ module WordTree
|
|
28
30
|
end
|
29
31
|
|
30
32
|
def metadata
|
31
|
-
attributes.select{ |k,v| !v.nil? && k != :content
|
33
|
+
attributes.select{ |k,v| !v.nil? && k != :content }
|
34
|
+
end
|
35
|
+
|
36
|
+
def content_clean(wrap=120)
|
37
|
+
TextUtils.clean_text(content, wrap)
|
38
|
+
end
|
39
|
+
|
40
|
+
def content_size
|
41
|
+
content ? content.size : nil
|
32
42
|
end
|
33
43
|
|
34
|
-
def
|
35
|
-
|
44
|
+
def calculate_simhash
|
45
|
+
content ? content_clean.simhash(:split_by => /\s/) : nil
|
36
46
|
end
|
37
47
|
end
|
38
48
|
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'rethinkdb'
|
2
|
+
require 'wordtree/book'
|
3
|
+
|
4
|
+
module WordTree
|
5
|
+
module DB
|
6
|
+
class Librarian
|
7
|
+
def initialize(rethinkdb_connection)
|
8
|
+
@rdb = rethinkdb_connection
|
9
|
+
@r = RethinkDB::RQL.new
|
10
|
+
end
|
11
|
+
|
12
|
+
def find(book_id)
|
13
|
+
result = @r.table('books').get(book_id).run(@rdb)
|
14
|
+
result ? Book.new(result) : nil
|
15
|
+
end
|
16
|
+
|
17
|
+
def save(book)
|
18
|
+
result = @r.table('books').insert(book.metadata, :upsert => true).run(@rdb)
|
19
|
+
return result["replaced"] == 1 || result["inserted"] == 1 || result["unchanged"] == 1
|
20
|
+
end
|
21
|
+
|
22
|
+
def search(params, page=1, per_page=20)
|
23
|
+
conditions = match_list(params,
|
24
|
+
[:id, :title, :author, :source, :status],
|
25
|
+
[:year, :size_bytes])
|
26
|
+
cursor = @r.table('books').
|
27
|
+
order_by(:index => 'year').
|
28
|
+
filter(&conditions).
|
29
|
+
skip((page-1)*per_page).
|
30
|
+
limit(per_page).
|
31
|
+
run(@rdb)
|
32
|
+
if !(results = cursor.to_a).empty?
|
33
|
+
results.map{ |result| Book.new(result) }
|
34
|
+
else
|
35
|
+
nil
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
protected
|
40
|
+
|
41
|
+
# Create a condition Proc suitable for RethinkDB search queries
|
42
|
+
def match_list(params, string_keys=[], numeric_keys=[], escape=true)
|
43
|
+
Proc.new do |record|
|
44
|
+
(
|
45
|
+
string_keys.map do |key|
|
46
|
+
if params[key]
|
47
|
+
term = escape ? Regexp.escape(params[key]) : params[key]
|
48
|
+
record[key.to_s].match("(?i)#{term}")
|
49
|
+
end
|
50
|
+
end +
|
51
|
+
numeric_keys.map do |key|
|
52
|
+
if params[key]
|
53
|
+
if params[key].include?(',')
|
54
|
+
low, high = params[key].split(',', 2).map{ |v| v.to_i }
|
55
|
+
(record[key.to_s] >= low) & (record[key.to_s] <= high)
|
56
|
+
else
|
57
|
+
value = params[key].to_i
|
58
|
+
record[key.to_s].eq(value)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
).compact.foldl{ |a,b| a & b }
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'preamble'
|
2
|
+
require 'wordtree/book'
|
3
|
+
require 'wordtree/disk/library'
|
4
|
+
require 'wordtree/archdown'
|
5
|
+
|
6
|
+
module WordTree
|
7
|
+
module Disk
|
8
|
+
class Librarian
|
9
|
+
attr_reader :library
|
10
|
+
|
11
|
+
def initialize(library)
|
12
|
+
@library = library
|
13
|
+
end
|
14
|
+
|
15
|
+
def find(book_id)
|
16
|
+
begin
|
17
|
+
retrieved = Preamble.load(library.path_to(book_id))
|
18
|
+
Book.create(book_id, retrieved.metadata, retrieved.content)
|
19
|
+
rescue Errno::ENOENT
|
20
|
+
nil
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def save(book)
|
25
|
+
library.mkdir(book.id)
|
26
|
+
Preamble.new(book.metadata, book.content || "").save(library.path_to(book.id))
|
27
|
+
end
|
28
|
+
|
29
|
+
def archive_org_get(*book_ids, &block)
|
30
|
+
book_ids.map do |book_id|
|
31
|
+
archive_org_get_with_conditions(identifier: book_id, &block)
|
32
|
+
end.flatten(1)
|
33
|
+
end
|
34
|
+
|
35
|
+
def archive_org_get_range_of_years(start_year, end_year, &block)
|
36
|
+
archive_org_get_with_conditions({
|
37
|
+
:start_year => start_year,
|
38
|
+
:end_year => end_year
|
39
|
+
}, &block)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Downloads a set of books to the on-disk library and
|
43
|
+
# returns a list of book_ids
|
44
|
+
def archive_org_get_with_conditions(conditions, &block)
|
45
|
+
archdown = Archdown.new
|
46
|
+
[].tap do |archive_org_ids|
|
47
|
+
archdown.download_all(conditions) do |metadata, content, failure|
|
48
|
+
if failure
|
49
|
+
#TODO: logging
|
50
|
+
$stderr.puts "Unable to download from archive.org: #{failure}"
|
51
|
+
else
|
52
|
+
book = Book.create(metadata["archive_org_id"], metadata, content)
|
53
|
+
save(book)
|
54
|
+
yield book, self if block_given?
|
55
|
+
archive_org_ids << book.id
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
|
3
|
+
require 'wordtree/archdown'
|
4
|
+
require 'wordtree/disk/library_locator'
|
5
|
+
|
6
|
+
module WordTree
|
7
|
+
module Disk
|
8
|
+
class Library
|
9
|
+
|
10
|
+
FILE_TYPES = {
|
11
|
+
:raw => "%s.md"
|
12
|
+
}
|
13
|
+
|
14
|
+
# The file path to the root of the library directory, e.g. /data/library
|
15
|
+
attr_reader :root
|
16
|
+
|
17
|
+
def initialize(root)
|
18
|
+
@root = root
|
19
|
+
end
|
20
|
+
|
21
|
+
# returns the full path of a book's subdirectory within the library
|
22
|
+
# Accepts either a String or a LibraryLocator object
|
23
|
+
def dir_of(book_id)
|
24
|
+
File.expand_path(LibraryLocator.identity(book_id).relpath, root)
|
25
|
+
end
|
26
|
+
|
27
|
+
def path_to(book_id, type=:raw)
|
28
|
+
File.join(dir_of(book_id), file_type(book_id, type))
|
29
|
+
end
|
30
|
+
|
31
|
+
def file_type(book_id, type=:raw)
|
32
|
+
locator = LibraryLocator.identity(book_id)
|
33
|
+
FILE_TYPES[type] % locator.id
|
34
|
+
end
|
35
|
+
|
36
|
+
# Create all subdirs up to the location where a book is stored
|
37
|
+
# Accepts either a String or a LibraryLocator object
|
38
|
+
def mkdir(book_id)
|
39
|
+
FileUtils.mkdir_p(dir_of(book_id))
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module WordTree
|
2
|
+
module Disk
|
3
|
+
# A class that converts from a book ID to a location within the library, e.g.
|
4
|
+
#
|
5
|
+
# "firstbooknapole00gruagoog"
|
6
|
+
#
|
7
|
+
# becomes
|
8
|
+
#
|
9
|
+
# "fi/og/firstbooknapole00gruagoog/"
|
10
|
+
#
|
11
|
+
# or, in context of the full path:
|
12
|
+
#
|
13
|
+
# [/data/library/] "fi/og/firstbooknapole00gruagoog/" [firstbooknapole00gruagoog.md]
|
14
|
+
#
|
15
|
+
class LibraryLocator
|
16
|
+
# The book ID to locate
|
17
|
+
attr_reader :id
|
18
|
+
|
19
|
+
# Construct a LibraryLocator from a string (book ID)
|
20
|
+
def initialize(id)
|
21
|
+
@id = id
|
22
|
+
end
|
23
|
+
|
24
|
+
def first
|
25
|
+
@id[0..1].downcase
|
26
|
+
end
|
27
|
+
|
28
|
+
def last
|
29
|
+
@id[-2..-1].downcase
|
30
|
+
end
|
31
|
+
|
32
|
+
# Returns a "relative" path to be joined to the library root,
|
33
|
+
# e.g. if the identifier is "firstbooknapole00gruagoog", then relpath
|
34
|
+
# should return "fi/og/firstbooknapole00gruagoog", i.e. probably later to
|
35
|
+
# become something like "/data/library/fi/og/firstbooknapole00gruagoog"
|
36
|
+
def relpath
|
37
|
+
File.join(first, last, @id)
|
38
|
+
end
|
39
|
+
|
40
|
+
# Constructor that is as willing to use a String as it is a LibraryLocator
|
41
|
+
def self.identity(id)
|
42
|
+
id.is_a?(LibraryLocator) ? id : new(id)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
data/lib/wordtree/version.rb
CHANGED
data/lib/wordtree.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
require "wordtree/version"
|
2
2
|
require "wordtree/book"
|
3
3
|
|
4
|
-
require "wordtree/
|
5
|
-
|
6
|
-
require "wordtree/
|
4
|
+
require "wordtree/db/librarian"
|
5
|
+
|
6
|
+
require "wordtree/disk/library"
|
7
|
+
require "wordtree/disk/librarian"
|
8
|
+
require "wordtree/disk/library_locator"
|
data/spec/spec_helper.rb
CHANGED
data/spec/wordtree/book_spec.rb
CHANGED
@@ -0,0 +1,58 @@
|
|
1
|
+
require_relative '../../spec_helper'
|
2
|
+
require 'tmpdir'
|
3
|
+
require 'preamble'
|
4
|
+
require 'wordtree/db/librarian'
|
5
|
+
|
6
|
+
describe WordTree::DB::Librarian do
|
7
|
+
it "instantiates" do
|
8
|
+
WordTree::DB::Librarian.new("connection")
|
9
|
+
end
|
10
|
+
|
11
|
+
context "with rethinkdb connection" do
|
12
|
+
let(:r) { RethinkDB::RQL.new }
|
13
|
+
let(:rdb) { r.connect(
|
14
|
+
:host => RDB_CONFIG[:host],
|
15
|
+
:port => RDB_CONFIG[:port],
|
16
|
+
:db => RDB_CONFIG[:db]
|
17
|
+
) }
|
18
|
+
|
19
|
+
let(:librarian) { WordTree::DB::Librarian.new(rdb) }
|
20
|
+
|
21
|
+
before(:each) do
|
22
|
+
begin
|
23
|
+
r.table_drop('books').run(rdb)
|
24
|
+
rescue RethinkDB::RqlRuntimeError
|
25
|
+
ensure
|
26
|
+
r.table_create('books').run(rdb)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
describe "#find" do
|
31
|
+
it "returns nil if book doesn't exist" do
|
32
|
+
book = librarian.find("abc")
|
33
|
+
expect(book).to be_nil
|
34
|
+
end
|
35
|
+
|
36
|
+
it "finds a book" do
|
37
|
+
r.table('books').insert(
|
38
|
+
:id => "firstbooknapole00gruagoog",
|
39
|
+
:year => 1809).run(rdb)
|
40
|
+
book = librarian.find("firstbooknapole00gruagoog")
|
41
|
+
expect(book).to be_a(WordTree::Book)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
describe "#save" do
|
46
|
+
it "saves a book to the db" do
|
47
|
+
book = WordTree::Book.create('test', {:year => 1800}, "body")
|
48
|
+
librarian.save(book)
|
49
|
+
result = r.table('books').run(rdb).to_a
|
50
|
+
expect(result).to eq([{
|
51
|
+
"id" => "test",
|
52
|
+
"simhash" => 1318950168412674304,
|
53
|
+
"size_bytes" => 4,
|
54
|
+
"year" => 1800}])
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
require_relative '../../spec_helper'
|
2
|
+
require 'tmpdir'
|
3
|
+
require 'preamble'
|
4
|
+
require 'wordtree/disk/librarian'
|
5
|
+
|
6
|
+
describe WordTree::Disk::Librarian do
|
7
|
+
let(:root) { Dir.mktmpdir }
|
8
|
+
let(:library) { WordTree::Disk::Library.new(root) }
|
9
|
+
let(:librarian) { WordTree::Disk::Librarian.new(library) }
|
10
|
+
|
11
|
+
it "downloads an archive.org book" do
|
12
|
+
VCR.use_cassette('archive_org_download_book') do
|
13
|
+
librarian.archive_org_get("firstbooknapole00gruagoog")
|
14
|
+
book = librarian.find("firstbooknapole00gruagoog")
|
15
|
+
expect(book.year).to eq(1809)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
context "with fixture library" do
|
20
|
+
# Need a read-only library with fixtures in it for some tests
|
21
|
+
let(:root) { fixture("library") }
|
22
|
+
|
23
|
+
describe "#find" do
|
24
|
+
it "returns nil if the book is not found" do
|
25
|
+
book = librarian.find("nobook")
|
26
|
+
expect(book).to be_nil
|
27
|
+
end
|
28
|
+
|
29
|
+
it "loads book from disk" do
|
30
|
+
book = librarian.find("book")
|
31
|
+
expect(book.id).to eq("book")
|
32
|
+
expect(book.year).to eq(1800)
|
33
|
+
expect(book.content).to eq("Book with content")
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
it "saves to disk (yaml, content)" do
|
38
|
+
tmp_root = Dir.mktmpdir
|
39
|
+
tmp_library = WordTree::Disk::Library.new(tmp_root)
|
40
|
+
tmp_librarian = WordTree::Disk::Librarian.new(tmp_library)
|
41
|
+
|
42
|
+
book = librarian.find("book")
|
43
|
+
|
44
|
+
book.source = "test"
|
45
|
+
book.content += "."
|
46
|
+
|
47
|
+
tmp_librarian.save(book)
|
48
|
+
|
49
|
+
updated = Preamble.load(tmp_library.path_to("book"))
|
50
|
+
expect(updated.metadata).to eq(
|
51
|
+
:id => "book",
|
52
|
+
:year => 1800,
|
53
|
+
:source => "test",
|
54
|
+
:simhash => 14921967289891934128,
|
55
|
+
:size_bytes => 17)
|
56
|
+
expect(updated.content).to eq("Book with content.")
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
end
|
@@ -1,10 +1,10 @@
|
|
1
1
|
require 'spec_helper'
|
2
|
-
require 'wordtree/library'
|
2
|
+
require 'wordtree/disk/library'
|
3
3
|
require 'tmpdir'
|
4
4
|
|
5
|
-
describe WordTree::Library do
|
5
|
+
describe WordTree::Disk::Library do
|
6
6
|
let(:root) { Dir.mktmpdir('library') }
|
7
|
-
let(:library) { WordTree::Library.new(root) }
|
7
|
+
let(:library) { WordTree::Disk::Library.new(root) }
|
8
8
|
|
9
9
|
it "initializes with path" do
|
10
10
|
expect(library.root).to eq(root)
|
data/wordtree.gemspec
CHANGED
@@ -22,6 +22,8 @@ Gem::Specification.new do |spec|
|
|
22
22
|
spec.add_dependency "preamble", ">= 0.0.3"
|
23
23
|
spec.add_dependency "archivist-client", ">= 0.1.7"
|
24
24
|
spec.add_dependency "retriable"
|
25
|
+
spec.add_dependency "simhash", "0.2.5"
|
26
|
+
spec.add_dependency "rethinkdb", "~> 1.13.0"
|
25
27
|
|
26
28
|
spec.add_development_dependency "bundler", "~> 1.3"
|
27
29
|
spec.add_development_dependency "rake"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wordtree
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-07-
|
12
|
+
date: 2014-07-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: virtus
|
@@ -75,6 +75,38 @@ dependencies:
|
|
75
75
|
- - ! '>='
|
76
76
|
- !ruby/object:Gem::Version
|
77
77
|
version: '0'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: simhash
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - '='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: 0.2.5
|
86
|
+
type: :runtime
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - '='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: 0.2.5
|
94
|
+
- !ruby/object:Gem::Dependency
|
95
|
+
name: rethinkdb
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ~>
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: 1.13.0
|
102
|
+
type: :runtime
|
103
|
+
prerelease: false
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ~>
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: 1.13.0
|
78
110
|
- !ruby/object:Gem::Dependency
|
79
111
|
name: bundler
|
80
112
|
requirement: !ruby/object:Gem::Requirement
|
@@ -220,17 +252,19 @@ files:
|
|
220
252
|
- lib/wordtree.rb
|
221
253
|
- lib/wordtree/archdown.rb
|
222
254
|
- lib/wordtree/book.rb
|
223
|
-
- lib/wordtree/librarian.rb
|
224
|
-
- lib/wordtree/
|
225
|
-
- lib/wordtree/
|
255
|
+
- lib/wordtree/db/librarian.rb
|
256
|
+
- lib/wordtree/disk/librarian.rb
|
257
|
+
- lib/wordtree/disk/library.rb
|
258
|
+
- lib/wordtree/disk/library_locator.rb
|
226
259
|
- lib/wordtree/text_utils.rb
|
227
260
|
- lib/wordtree/version.rb
|
228
261
|
- spec/fixtures/cassettes/archive_org_download_book.yml
|
229
262
|
- spec/fixtures/library/bo/ok/book/book.md
|
230
263
|
- spec/spec_helper.rb
|
231
264
|
- spec/wordtree/book_spec.rb
|
232
|
-
- spec/wordtree/librarian_spec.rb
|
233
|
-
- spec/wordtree/
|
265
|
+
- spec/wordtree/db/librarian_spec.rb
|
266
|
+
- spec/wordtree/disk/librarian_spec.rb
|
267
|
+
- spec/wordtree/disk/library_spec.rb
|
234
268
|
- spec/wordtree/text_utils_spec.rb
|
235
269
|
- wordtree.gemspec
|
236
270
|
homepage: ''
|
@@ -263,7 +297,8 @@ test_files:
|
|
263
297
|
- spec/fixtures/library/bo/ok/book/book.md
|
264
298
|
- spec/spec_helper.rb
|
265
299
|
- spec/wordtree/book_spec.rb
|
266
|
-
- spec/wordtree/librarian_spec.rb
|
267
|
-
- spec/wordtree/
|
300
|
+
- spec/wordtree/db/librarian_spec.rb
|
301
|
+
- spec/wordtree/disk/librarian_spec.rb
|
302
|
+
- spec/wordtree/disk/library_spec.rb
|
268
303
|
- spec/wordtree/text_utils_spec.rb
|
269
304
|
has_rdoc:
|
data/lib/wordtree/librarian.rb
DELETED
@@ -1,58 +0,0 @@
|
|
1
|
-
require 'preamble'
|
2
|
-
require 'wordtree/book'
|
3
|
-
require 'wordtree/library'
|
4
|
-
require 'wordtree/archdown'
|
5
|
-
|
6
|
-
module WordTree
|
7
|
-
class Librarian
|
8
|
-
attr_reader :library
|
9
|
-
|
10
|
-
def initialize(library)
|
11
|
-
@library = library
|
12
|
-
end
|
13
|
-
|
14
|
-
def find(book_id)
|
15
|
-
retrieved = Preamble.load(library.path_to(book_id))
|
16
|
-
Book.create(book_id, retrieved.metadata, retrieved.content)
|
17
|
-
end
|
18
|
-
|
19
|
-
def save(book)
|
20
|
-
library.mkdir(book.id)
|
21
|
-
Preamble.new(book.metadata, book.content).save(library.path_to(book.id))
|
22
|
-
end
|
23
|
-
|
24
|
-
def archive_org_get(*book_ids, &block)
|
25
|
-
book_ids.map do |book_id|
|
26
|
-
archive_org_get_with_conditions(identifier: book_id, &block)
|
27
|
-
end.flatten(1)
|
28
|
-
end
|
29
|
-
|
30
|
-
def archive_org_get_range_of_years(start_year, end_year, &block)
|
31
|
-
archive_org_get_with_conditions({
|
32
|
-
:start_year => start_year,
|
33
|
-
:end_year => end_year
|
34
|
-
}, &block)
|
35
|
-
end
|
36
|
-
|
37
|
-
# Downloads a set of books to the on-disk library and
|
38
|
-
# returns a list of book_ids
|
39
|
-
def archive_org_get_with_conditions(conditions, &block)
|
40
|
-
archdown = Archdown.new
|
41
|
-
[].tap do |archive_org_ids|
|
42
|
-
archdown.download_all(conditions) do |metadata, content, failure|
|
43
|
-
if failure
|
44
|
-
#TODO: logging
|
45
|
-
$stderr.puts "Unable to download from archive.org: #{failure}"
|
46
|
-
else
|
47
|
-
book = Book.create(metadata["archive_org_id"], metadata, content)
|
48
|
-
save(book)
|
49
|
-
yield book, self if block_given?
|
50
|
-
archive_org_ids << book.id
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
|
57
|
-
end
|
58
|
-
end
|
data/lib/wordtree/library.rb
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
require 'fileutils'
|
2
|
-
|
3
|
-
require 'wordtree/archdown'
|
4
|
-
require 'wordtree/library_locator'
|
5
|
-
|
6
|
-
module WordTree
|
7
|
-
class Library
|
8
|
-
|
9
|
-
FILE_TYPES = {
|
10
|
-
:raw => "%s.md"
|
11
|
-
}
|
12
|
-
|
13
|
-
# The file path to the root of the library directory, e.g. /data/library
|
14
|
-
attr_reader :root
|
15
|
-
|
16
|
-
def initialize(root)
|
17
|
-
@root = root
|
18
|
-
end
|
19
|
-
|
20
|
-
# returns the full path of a book's subdirectory within the library
|
21
|
-
# Accepts either a String or a LibraryLocator object
|
22
|
-
def dir_of(book_id)
|
23
|
-
File.expand_path(LibraryLocator.identity(book_id).relpath, root)
|
24
|
-
end
|
25
|
-
|
26
|
-
def path_to(book_id, type=:raw)
|
27
|
-
File.join(dir_of(book_id), file_type(book_id, type))
|
28
|
-
end
|
29
|
-
|
30
|
-
def file_type(book_id, type=:raw)
|
31
|
-
locator = LibraryLocator.identity(book_id)
|
32
|
-
FILE_TYPES[type] % locator.id
|
33
|
-
end
|
34
|
-
|
35
|
-
# Create all subdirs up to the location where a book is stored
|
36
|
-
# Accepts either a String or a LibraryLocator object
|
37
|
-
def mkdir(book_id)
|
38
|
-
FileUtils.mkdir_p(dir_of(book_id))
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|
@@ -1,44 +0,0 @@
|
|
1
|
-
module WordTree
|
2
|
-
# A class that converts from a book ID to a location within the library, e.g.
|
3
|
-
#
|
4
|
-
# "firstbooknapole00gruagoog"
|
5
|
-
#
|
6
|
-
# becomes
|
7
|
-
#
|
8
|
-
# "fi/og/firstbooknapole00gruagoog/"
|
9
|
-
#
|
10
|
-
# or, in context of the full path:
|
11
|
-
#
|
12
|
-
# [/data/library/] "fi/og/firstbooknapole00gruagoog/" [firstbooknapole00gruagoog.md]
|
13
|
-
#
|
14
|
-
class LibraryLocator
|
15
|
-
# The book ID to locate
|
16
|
-
attr_reader :id
|
17
|
-
|
18
|
-
# Construct a LibraryLocator from a string (book ID)
|
19
|
-
def initialize(id)
|
20
|
-
@id = id
|
21
|
-
end
|
22
|
-
|
23
|
-
def first
|
24
|
-
@id[0..1].downcase
|
25
|
-
end
|
26
|
-
|
27
|
-
def last
|
28
|
-
@id[-2..-1].downcase
|
29
|
-
end
|
30
|
-
|
31
|
-
# Returns a "relative" path to be joined to the library root,
|
32
|
-
# e.g. if the identifier is "firstbooknapole00gruagoog", then relpath
|
33
|
-
# should return "fi/og/firstbooknapole00gruagoog", i.e. probably later to
|
34
|
-
# become something like "/data/library/fi/og/firstbooknapole00gruagoog"
|
35
|
-
def relpath
|
36
|
-
File.join(first, last, @id)
|
37
|
-
end
|
38
|
-
|
39
|
-
# Constructor that is as willing to use a String as it is a LibraryLocator
|
40
|
-
def self.identity(id)
|
41
|
-
id.is_a?(LibraryLocator) ? id : new(id)
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
@@ -1,48 +0,0 @@
|
|
1
|
-
require_relative '../spec_helper'
|
2
|
-
require 'tmpdir'
|
3
|
-
require 'preamble'
|
4
|
-
require 'wordtree/librarian'
|
5
|
-
|
6
|
-
describe WordTree::Librarian do
|
7
|
-
let(:root) { Dir.mktmpdir }
|
8
|
-
let(:library) { WordTree::Library.new(root) }
|
9
|
-
let(:librarian) { WordTree::Librarian.new(library) }
|
10
|
-
|
11
|
-
it "downloads an archive.org book" do
|
12
|
-
VCR.use_cassette('archive_org_download_book') do
|
13
|
-
librarian.archive_org_get("firstbooknapole00gruagoog")
|
14
|
-
book = librarian.find("firstbooknapole00gruagoog")
|
15
|
-
expect(book.year).to eq(1809)
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
context "with fixture library" do
|
20
|
-
# Need a read-only library with fixtures in it for some tests
|
21
|
-
let(:root) { fixture("library") }
|
22
|
-
|
23
|
-
it "loads book from disk" do
|
24
|
-
book = librarian.find("book")
|
25
|
-
expect(book.id).to eq("book")
|
26
|
-
expect(book.year).to eq(1800)
|
27
|
-
expect(book.content).to eq("Book with content")
|
28
|
-
end
|
29
|
-
|
30
|
-
it "saves to disk (yaml, content)" do
|
31
|
-
tmp_root = Dir.mktmpdir
|
32
|
-
tmp_library = WordTree::Library.new(tmp_root)
|
33
|
-
tmp_librarian = WordTree::Librarian.new(tmp_library)
|
34
|
-
|
35
|
-
book = librarian.find("book")
|
36
|
-
|
37
|
-
book.source = "test"
|
38
|
-
book.content += "."
|
39
|
-
|
40
|
-
tmp_librarian.save(book)
|
41
|
-
|
42
|
-
updated = Preamble.load(tmp_library.path_to("book"))
|
43
|
-
expect(updated.metadata).to eq({:year => 1800, :source => "test"})
|
44
|
-
expect(updated.content).to eq("Book with content.")
|
45
|
-
end
|
46
|
-
|
47
|
-
end
|
48
|
-
end
|