wordtree 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +6 -3
- data/lib/wordtree/librarian.rb +6 -6
- data/lib/wordtree/version.rb +1 -1
- data/lib/wordtree.rb +4 -3
- data/spec/fixtures/cassettes/archive_org_download_book.yml +79 -0
- data/spec/wordtree/librarian_spec.rb +1 -1
- metadata +1 -1
data/README.md
CHANGED
@@ -13,6 +13,12 @@ This is the WordTree ruby gem for text analysis.
|
|
13
13
|
library = WordTree::Library.new("/tmp/library")
|
14
14
|
librarian = WordTree::Librarian.new(library)
|
15
15
|
|
16
|
+
Download a book from Archive.org to your "library":
|
17
|
+
|
18
|
+
book_ids = librarian.archive_org_get(
|
19
|
+
'latewarbetween_00hunt',
|
20
|
+
'firstbooknapole00gruagoog')
|
21
|
+
|
16
22
|
Find a book in your on-disk "library":
|
17
23
|
|
18
24
|
book = librarian.find('firstbooknapole00gruagoog')
|
@@ -24,9 +30,6 @@ Modify and save a book to your "library":
|
|
24
30
|
book.year = 2014
|
25
31
|
librarian.save(book)
|
26
32
|
|
27
|
-
Download a book from Archive.org to your "library":
|
28
|
-
|
29
|
-
book_id = librarian.archive_org_get('latewarbetween_00hunt')
|
30
33
|
|
31
34
|
|
32
35
|
## Contributing
|
data/lib/wordtree/librarian.rb
CHANGED
@@ -21,14 +21,14 @@ module WordTree
|
|
21
21
|
Preamble.new(book.metadata, book.content).save(library.path_to(book.id))
|
22
22
|
end
|
23
23
|
|
24
|
-
def
|
25
|
-
|
26
|
-
:
|
27
|
-
|
24
|
+
def archive_org_get(*book_ids, &block)
|
25
|
+
book_ids.map do |book_id|
|
26
|
+
archive_org_get_with_conditions(identifier: book_id, &block)
|
27
|
+
end.flatten(1)
|
28
28
|
end
|
29
29
|
|
30
30
|
def archive_org_get_range_of_years(start_year, end_year, &block)
|
31
|
-
|
31
|
+
archive_org_get_with_conditions({
|
32
32
|
:start_year => start_year,
|
33
33
|
:end_year => end_year
|
34
34
|
}, &block)
|
@@ -36,7 +36,7 @@ module WordTree
|
|
36
36
|
|
37
37
|
# Downloads a set of books to the on-disk library and
|
38
38
|
# returns a list of book_ids
|
39
|
-
def
|
39
|
+
def archive_org_get_with_conditions(conditions, &block)
|
40
40
|
archdown = Archdown.new
|
41
41
|
[].tap do |archive_org_ids|
|
42
42
|
archdown.download_all(conditions) do |metadata, content, failure|
|
data/lib/wordtree/version.rb
CHANGED
data/lib/wordtree.rb
CHANGED
@@ -1624,4 +1624,83 @@ http_interactions:
|
|
1624
1624
|
)","wt":"json","rows":"50"}},"response":{"numFound":1,"start":50,"docs":[]}}'
|
1625
1625
|
http_version:
|
1626
1626
|
recorded_at: Thu, 17 Jul 2014 01:35:20 GMT
|
1627
|
+
- request:
|
1628
|
+
method: get
|
1629
|
+
uri: http://archive.org/advancedsearch.php?fl%5B0%5D=identifier&fl%5B1%5D=title&fl%5B2%5D=creator&fl%5B3%5D=date&fl%5B4%5D=language&fl%5B5%5D=mediattype&output=json&page=1&q=mediatype:texts%20AND%20-mediatype:collection%20AND%20(language:eng%20OR%20language:English)%20AND%20identifier:firstbooknapole00gruagoog&rows=50&sort%5B0%5D=date%20asc
|
1630
|
+
body:
|
1631
|
+
encoding: US-ASCII
|
1632
|
+
string: ''
|
1633
|
+
headers:
|
1634
|
+
User-Agent:
|
1635
|
+
- Faraday v0.9.0
|
1636
|
+
Accept-Encoding:
|
1637
|
+
- gzip;q=1.0,deflate;q=0.6,identity;q=0.3
|
1638
|
+
Accept:
|
1639
|
+
- ! '*/*'
|
1640
|
+
response:
|
1641
|
+
status:
|
1642
|
+
code: 200
|
1643
|
+
message: OK
|
1644
|
+
headers:
|
1645
|
+
Server:
|
1646
|
+
- nginx/1.1.19
|
1647
|
+
Date:
|
1648
|
+
- Thu, 17 Jul 2014 04:29:38 GMT
|
1649
|
+
Content-Type:
|
1650
|
+
- application/json
|
1651
|
+
Transfer-Encoding:
|
1652
|
+
- chunked
|
1653
|
+
Connection:
|
1654
|
+
- keep-alive
|
1655
|
+
X-Powered-By:
|
1656
|
+
- PHP/5.3.10-1ubuntu3.2
|
1657
|
+
body:
|
1658
|
+
encoding: US-ASCII
|
1659
|
+
string: ! '{"responseHeader":{"status":0,"QTime":43,"params":{"json.wrf":"","qin":"mediatype:texts
|
1660
|
+
AND -mediatype:collection AND (language:eng OR language:English) AND identifier:firstbooknapole00gruagoog","fl":"identifier,title,creator,date,language,mediattype","sort":"date
|
1661
|
+
asc","indent":"","start":"0","q":"mediatype:texts AND -mediatype:collection
|
1662
|
+
AND ( language:eng OR language:English ) AND identifier:firstbooknapole00gruagoog","wt":"json","rows":"50"}},"response":{"numFound":1,"start":0,"docs":[{"title":"The
|
1663
|
+
First Book of Napoleon, the Tyrant of the Earth: Written in the 5813th Year
|
1664
|
+
of the World ...","date":"1809-01-01T00:00:00Z","identifier":"firstbooknapole00gruagoog","language":["English"],"creator":["Modeste
|
1665
|
+
Gruau"]}]}}'
|
1666
|
+
http_version:
|
1667
|
+
recorded_at: Thu, 17 Jul 2014 04:29:37 GMT
|
1668
|
+
- request:
|
1669
|
+
method: get
|
1670
|
+
uri: http://archive.org/advancedsearch.php?fl%5B0%5D=identifier&fl%5B1%5D=title&fl%5B2%5D=creator&fl%5B3%5D=date&fl%5B4%5D=language&fl%5B5%5D=mediattype&output=json&page=2&q=mediatype:texts%20AND%20-mediatype:collection%20AND%20(language:eng%20OR%20language:English)%20AND%20identifier:firstbooknapole00gruagoog&rows=50&sort%5B0%5D=date%20asc
|
1671
|
+
body:
|
1672
|
+
encoding: US-ASCII
|
1673
|
+
string: ''
|
1674
|
+
headers:
|
1675
|
+
User-Agent:
|
1676
|
+
- Faraday v0.9.0
|
1677
|
+
Accept-Encoding:
|
1678
|
+
- gzip;q=1.0,deflate;q=0.6,identity;q=0.3
|
1679
|
+
Accept:
|
1680
|
+
- ! '*/*'
|
1681
|
+
response:
|
1682
|
+
status:
|
1683
|
+
code: 200
|
1684
|
+
message: OK
|
1685
|
+
headers:
|
1686
|
+
Server:
|
1687
|
+
- nginx/1.1.19
|
1688
|
+
Date:
|
1689
|
+
- Thu, 17 Jul 2014 04:29:39 GMT
|
1690
|
+
Content-Type:
|
1691
|
+
- application/json
|
1692
|
+
Transfer-Encoding:
|
1693
|
+
- chunked
|
1694
|
+
Connection:
|
1695
|
+
- keep-alive
|
1696
|
+
X-Powered-By:
|
1697
|
+
- PHP/5.3.10-1ubuntu3.2
|
1698
|
+
body:
|
1699
|
+
encoding: US-ASCII
|
1700
|
+
string: ! '{"responseHeader":{"status":0,"QTime":0,"params":{"json.wrf":"","qin":"mediatype:texts
|
1701
|
+
AND -mediatype:collection AND (language:eng OR language:English) AND identifier:firstbooknapole00gruagoog","fl":"identifier,title,creator,date,language,mediattype","sort":"date
|
1702
|
+
asc","indent":"","start":"50","q":"mediatype:texts AND -mediatype:collection
|
1703
|
+
AND ( language:eng OR language:English ) AND identifier:firstbooknapole00gruagoog","wt":"json","rows":"50"}},"response":{"numFound":1,"start":50,"docs":[]}}'
|
1704
|
+
http_version:
|
1705
|
+
recorded_at: Thu, 17 Jul 2014 04:29:38 GMT
|
1627
1706
|
recorded_with: VCR 2.9.2
|
@@ -10,7 +10,7 @@ describe WordTree::Librarian do
|
|
10
10
|
|
11
11
|
it "downloads an archive.org book" do
|
12
12
|
VCR.use_cassette('archive_org_download_book') do
|
13
|
-
librarian.
|
13
|
+
librarian.archive_org_get("firstbooknapole00gruagoog")
|
14
14
|
book = librarian.find("firstbooknapole00gruagoog")
|
15
15
|
expect(book.year).to eq(1809)
|
16
16
|
end
|