wordtree 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +6 -3
- data/lib/wordtree/librarian.rb +6 -6
- data/lib/wordtree/version.rb +1 -1
- data/lib/wordtree.rb +4 -3
- data/spec/fixtures/cassettes/archive_org_download_book.yml +79 -0
- data/spec/wordtree/librarian_spec.rb +1 -1
- metadata +1 -1
data/README.md
CHANGED
@@ -13,6 +13,12 @@ This is the WordTree ruby gem for text analysis.
|
|
13
13
|
library = WordTree::Library.new("/tmp/library")
|
14
14
|
librarian = WordTree::Librarian.new(library)
|
15
15
|
|
16
|
+
Download a book from Archive.org to your "library":
|
17
|
+
|
18
|
+
book_ids = librarian.archive_org_get(
|
19
|
+
'latewarbetween_00hunt',
|
20
|
+
'firstbooknapole00gruagoog')
|
21
|
+
|
16
22
|
Find a book in your on-disk "library":
|
17
23
|
|
18
24
|
book = librarian.find('firstbooknapole00gruagoog')
|
@@ -24,9 +30,6 @@ Modify and save a book to your "library":
|
|
24
30
|
book.year = 2014
|
25
31
|
librarian.save(book)
|
26
32
|
|
27
|
-
Download a book from Archive.org to your "library":
|
28
|
-
|
29
|
-
book_id = librarian.archive_org_get('latewarbetween_00hunt')
|
30
33
|
|
31
34
|
|
32
35
|
## Contributing
|
data/lib/wordtree/librarian.rb
CHANGED
@@ -21,14 +21,14 @@ module WordTree
|
|
21
21
|
Preamble.new(book.metadata, book.content).save(library.path_to(book.id))
|
22
22
|
end
|
23
23
|
|
24
|
-
def
|
25
|
-
|
26
|
-
:
|
27
|
-
|
24
|
+
def archive_org_get(*book_ids, &block)
|
25
|
+
book_ids.map do |book_id|
|
26
|
+
archive_org_get_with_conditions(identifier: book_id, &block)
|
27
|
+
end.flatten(1)
|
28
28
|
end
|
29
29
|
|
30
30
|
def archive_org_get_range_of_years(start_year, end_year, &block)
|
31
|
-
|
31
|
+
archive_org_get_with_conditions({
|
32
32
|
:start_year => start_year,
|
33
33
|
:end_year => end_year
|
34
34
|
}, &block)
|
@@ -36,7 +36,7 @@ module WordTree
|
|
36
36
|
|
37
37
|
# Downloads a set of books to the on-disk library and
|
38
38
|
# returns a list of book_ids
|
39
|
-
def
|
39
|
+
def archive_org_get_with_conditions(conditions, &block)
|
40
40
|
archdown = Archdown.new
|
41
41
|
[].tap do |archive_org_ids|
|
42
42
|
archdown.download_all(conditions) do |metadata, content, failure|
|
data/lib/wordtree/version.rb
CHANGED
data/lib/wordtree.rb
CHANGED
@@ -1624,4 +1624,83 @@ http_interactions:
|
|
1624
1624
|
)","wt":"json","rows":"50"}},"response":{"numFound":1,"start":50,"docs":[]}}'
|
1625
1625
|
http_version:
|
1626
1626
|
recorded_at: Thu, 17 Jul 2014 01:35:20 GMT
|
1627
|
+
- request:
|
1628
|
+
method: get
|
1629
|
+
uri: http://archive.org/advancedsearch.php?fl%5B0%5D=identifier&fl%5B1%5D=title&fl%5B2%5D=creator&fl%5B3%5D=date&fl%5B4%5D=language&fl%5B5%5D=mediattype&output=json&page=1&q=mediatype:texts%20AND%20-mediatype:collection%20AND%20(language:eng%20OR%20language:English)%20AND%20identifier:firstbooknapole00gruagoog&rows=50&sort%5B0%5D=date%20asc
|
1630
|
+
body:
|
1631
|
+
encoding: US-ASCII
|
1632
|
+
string: ''
|
1633
|
+
headers:
|
1634
|
+
User-Agent:
|
1635
|
+
- Faraday v0.9.0
|
1636
|
+
Accept-Encoding:
|
1637
|
+
- gzip;q=1.0,deflate;q=0.6,identity;q=0.3
|
1638
|
+
Accept:
|
1639
|
+
- ! '*/*'
|
1640
|
+
response:
|
1641
|
+
status:
|
1642
|
+
code: 200
|
1643
|
+
message: OK
|
1644
|
+
headers:
|
1645
|
+
Server:
|
1646
|
+
- nginx/1.1.19
|
1647
|
+
Date:
|
1648
|
+
- Thu, 17 Jul 2014 04:29:38 GMT
|
1649
|
+
Content-Type:
|
1650
|
+
- application/json
|
1651
|
+
Transfer-Encoding:
|
1652
|
+
- chunked
|
1653
|
+
Connection:
|
1654
|
+
- keep-alive
|
1655
|
+
X-Powered-By:
|
1656
|
+
- PHP/5.3.10-1ubuntu3.2
|
1657
|
+
body:
|
1658
|
+
encoding: US-ASCII
|
1659
|
+
string: ! '{"responseHeader":{"status":0,"QTime":43,"params":{"json.wrf":"","qin":"mediatype:texts
|
1660
|
+
AND -mediatype:collection AND (language:eng OR language:English) AND identifier:firstbooknapole00gruagoog","fl":"identifier,title,creator,date,language,mediattype","sort":"date
|
1661
|
+
asc","indent":"","start":"0","q":"mediatype:texts AND -mediatype:collection
|
1662
|
+
AND ( language:eng OR language:English ) AND identifier:firstbooknapole00gruagoog","wt":"json","rows":"50"}},"response":{"numFound":1,"start":0,"docs":[{"title":"The
|
1663
|
+
First Book of Napoleon, the Tyrant of the Earth: Written in the 5813th Year
|
1664
|
+
of the World ...","date":"1809-01-01T00:00:00Z","identifier":"firstbooknapole00gruagoog","language":["English"],"creator":["Modeste
|
1665
|
+
Gruau"]}]}}'
|
1666
|
+
http_version:
|
1667
|
+
recorded_at: Thu, 17 Jul 2014 04:29:37 GMT
|
1668
|
+
- request:
|
1669
|
+
method: get
|
1670
|
+
uri: http://archive.org/advancedsearch.php?fl%5B0%5D=identifier&fl%5B1%5D=title&fl%5B2%5D=creator&fl%5B3%5D=date&fl%5B4%5D=language&fl%5B5%5D=mediattype&output=json&page=2&q=mediatype:texts%20AND%20-mediatype:collection%20AND%20(language:eng%20OR%20language:English)%20AND%20identifier:firstbooknapole00gruagoog&rows=50&sort%5B0%5D=date%20asc
|
1671
|
+
body:
|
1672
|
+
encoding: US-ASCII
|
1673
|
+
string: ''
|
1674
|
+
headers:
|
1675
|
+
User-Agent:
|
1676
|
+
- Faraday v0.9.0
|
1677
|
+
Accept-Encoding:
|
1678
|
+
- gzip;q=1.0,deflate;q=0.6,identity;q=0.3
|
1679
|
+
Accept:
|
1680
|
+
- ! '*/*'
|
1681
|
+
response:
|
1682
|
+
status:
|
1683
|
+
code: 200
|
1684
|
+
message: OK
|
1685
|
+
headers:
|
1686
|
+
Server:
|
1687
|
+
- nginx/1.1.19
|
1688
|
+
Date:
|
1689
|
+
- Thu, 17 Jul 2014 04:29:39 GMT
|
1690
|
+
Content-Type:
|
1691
|
+
- application/json
|
1692
|
+
Transfer-Encoding:
|
1693
|
+
- chunked
|
1694
|
+
Connection:
|
1695
|
+
- keep-alive
|
1696
|
+
X-Powered-By:
|
1697
|
+
- PHP/5.3.10-1ubuntu3.2
|
1698
|
+
body:
|
1699
|
+
encoding: US-ASCII
|
1700
|
+
string: ! '{"responseHeader":{"status":0,"QTime":0,"params":{"json.wrf":"","qin":"mediatype:texts
|
1701
|
+
AND -mediatype:collection AND (language:eng OR language:English) AND identifier:firstbooknapole00gruagoog","fl":"identifier,title,creator,date,language,mediattype","sort":"date
|
1702
|
+
asc","indent":"","start":"50","q":"mediatype:texts AND -mediatype:collection
|
1703
|
+
AND ( language:eng OR language:English ) AND identifier:firstbooknapole00gruagoog","wt":"json","rows":"50"}},"response":{"numFound":1,"start":50,"docs":[]}}'
|
1704
|
+
http_version:
|
1705
|
+
recorded_at: Thu, 17 Jul 2014 04:29:38 GMT
|
1627
1706
|
recorded_with: VCR 2.9.2
|
@@ -10,7 +10,7 @@ describe WordTree::Librarian do
|
|
10
10
|
|
11
11
|
it "downloads an archive.org book" do
|
12
12
|
VCR.use_cassette('archive_org_download_book') do
|
13
|
-
librarian.
|
13
|
+
librarian.archive_org_get("firstbooknapole00gruagoog")
|
14
14
|
book = librarian.find("firstbooknapole00gruagoog")
|
15
15
|
expect(book.year).to eq(1809)
|
16
16
|
end
|