philologic-client 0.0.11 → 0.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. data/HISTORY.rdoc +8 -0
  2. data/PhiloLogicResponseDocumentation.txt +86 -0
  3. data/README.rdoc +54 -32
  4. data/Rakefile +26 -0
  5. data/lib/philologic-client/bibliography.rb +60 -0
  6. data/lib/philologic-client/collocation.rb +61 -0
  7. data/lib/philologic-client/concordance.rb +39 -0
  8. data/lib/philologic-client/document.rb +65 -0
  9. data/lib/philologic-client/frequency.rb +57 -0
  10. data/lib/philologic-client/frequency_row.rb +67 -0
  11. data/lib/philologic-client/link.rb +37 -0
  12. data/lib/philologic-client/occurrence.rb +79 -0
  13. data/lib/philologic-client/response.rb +63 -0
  14. data/lib/philologic-client/version.rb +1 -1
  15. data/lib/philologic-client.rb +102 -286
  16. data/test/data/bibliography.html +182 -0
  17. data/test/data/collocation.html +2594 -0
  18. data/test/data/concordance.html +758 -0
  19. data/test/data/frequency.html +73 -0
  20. data/test/data/navigation.html +69 -0
  21. data/test/data/object.html +20 -0
  22. data/test/test_bibliography.rb +78 -0
  23. data/test/test_client.rb +861 -0
  24. data/test/test_collocation.rb +76 -0
  25. data/test/test_concordance.rb +83 -0
  26. data/test/test_document.rb +127 -0
  27. data/test/test_frequency.rb +78 -0
  28. data/test/test_occurrence.rb +66 -0
  29. data/test/test_response.rb +41 -0
  30. metadata +55 -36
  31. data/doc/PhiloLogicResponseTemplates.txt +0 -46
  32. data/test/data/collocation_links.html +0 -145
  33. data/test/data/collocation_sartre.html +0 -67
  34. data/test/data/doc_file.html +0 -396
  35. data/test/data/frequency_links.html +0 -145
  36. data/test/data/frequency_sartre.html +0 -67
  37. data/test/data/query_sartre.html +0 -151
  38. data/test/data/root_file.html +0 -1851
  39. data/test/test_philologic_client.rb +0 -558
  40. data/test/test_philologic_link.rb +0 -101
data/HISTORY.rdoc CHANGED
@@ -1,5 +1,13 @@
1
1
  = Philologic::Client History
2
2
 
3
+ == 2012-03-30 v0.0.13
4
+
5
+ * Supports all of the Philologic v4 specification except collocation tables
6
+
7
+ == 2012-02-13 v0.0.12
8
+
9
+ * Revert to parsing with +Nokogiri::HTML+, not +Nokogiri::XML+
10
+
3
11
  == 2012-02-09 v0.0.11
4
12
 
5
13
  * Added +Philologic::Link#text()+
@@ -0,0 +1,86 @@
1
+ ---Concordance Report: returned by fulltext searches---
2
+
3
+ <div class='philologic_response'>
4
+ <ol class='philologic_concordance'>
5
+ <li class='philologic_occurrence'>
6
+ <span class='hit_n'> {hit_number} </span>
7
+ <a href =’{link_to_cited_object}' class='philologic_cite'>
8
+ <span class='philologic_property' title="{key}">{value}</span>
9
+ {more properties}
10
+ </a>
11
+ {possibly multiple links with their own properties--page images, etc.}
12
+ <div class='philologic_context'>{left context}
13
+ <span class='hilite'>{search term}<span>
14
+ …{possibly multiple separate hilt terms}…{right context}
15
+ </div>
16
+ </li>
17
+ {probably many occurrences}
18
+ </ol>
19
+ </div>
20
+
21
+
22
+ ---Bibliography Report: returned by metadata-only---
23
+
24
+ <div class='philologic_response'>
25
+ <ol class='philologic_cite_list'>
26
+ <li class='philologic_occurrence'>
27
+ <span class='hit_n'> {hit_number} </span>
28
+ <div class='philologic_cite'>
29
+ <a href='{link_to_cited_object}'>
30
+ <span class='philologic_property' title="{key}">{value}</span>
31
+ {more properties}
32
+ </a>
33
+ {possibly multiple links with their own properties--page images, etc.}
34
+ </div>
35
+ </li>
36
+ {probably many occurrences}
37
+ </ol>
38
+ </div>
39
+
40
+
41
+ ---Frequency Table: returned by frequency and collocation reports---
42
+ <div class='philologic_response'>
43
+ <table class='philologic_table'>
44
+ <tr class='freq_header_row'>
45
+ <th class='freq_checkbox_header'></th>
46
+ <th class='freq_label_header'> {facet_widget} </th>
47
+ <th class='freq_value_header'> {count} </th>
48
+ </tr>
49
+ <tr class='freq_row'>
50
+ <td class='freq_checkbox'><input type='checkbox'></input></td>
51
+ <td class='freq_label'> <a href='{link to concordance}'> {label} </a></td>
52
+ <td class='freq_value'> {count} </td>
53
+ </tr>
54
+ {probably many rows}
55
+ </table>
56
+ </div>
57
+
58
+
59
+ ---Object Viewer: returned by div and paragraph object access---
60
+
61
+ <div class='philologic_response'>
62
+ <div class=’philologic_cite’>
63
+ <span class='philologic_property' title="{key}">{value}</span>
64
+ {more properties}
65
+ </div>
66
+ <div class=’philologic_object’>
67
+ {various content}
68
+ <span class='hilite'>{hilit word}</span>
69
+ {possibly multiple hilit words}
70
+ </div>
71
+ </div>
72
+
73
+
74
+ ---Object Navigation: returned by document objects, links to children---
75
+
76
+ <div class='philologic_response'>
77
+ <div class=’philologic_cite’>
78
+ <span class='philologic_property' title="{key}">{value}</span>
79
+ {more properties}
80
+ </div>
81
+ <div class=’philologic_navigation’>
82
+ <a href=”zzz”>[link to child object]</a>
83
+ [links to all child objects]
84
+ </div>
85
+ </div>
86
+
data/README.rdoc CHANGED
@@ -6,50 +6,74 @@
6
6
 
7
7
  Philologic::Client.new(endpoint) do |client|
8
8
 
9
- # Get children of root
10
- client.children.each { |child| puts child }
9
+ # Get/Set encoding
10
+ encoding = client.encoding
11
+ client.encoding = 'utf-8' # Default
11
12
 
12
- # Get document
13
- first = client.children.first
14
- doc = first.document
15
- html = doc.html
16
- txt = doc.text
13
+ # Get/Set endpoint
14
+ endpoint = client.endpoint
15
+ client.endpoint = 'http://philologic.example.org'
17
16
 
17
+ # Get bibliography
18
+ # +biblio+ is a Philologic::Client::Bibliography object
19
+ biblio = client.bibliography
20
+
21
+ # Get reference to first title
22
+ # +first+ is a Philologic::Client::Occurrence object
23
+ first = biblio.titles.first
18
24
 
19
- # Simple (concordance) search
20
- client.search('sartre').each { |result| puts result }
25
+ # Get first title
26
+ # +doc+ will be a Philologic::Client::Document object
27
+ doc = client.document( first['href'] )
21
28
 
22
- # Search by arbitrary key-value pairs
23
- client.search( :query => 'sartre', :foo => 1 ).each { |result| puts result }
29
+ # Get text if present
30
+ if doc.text?
31
+ txt = doc.text # Document text
32
+ html = doc.html # Document HTML
33
+ end
24
34
 
35
+ # Get links if present
36
+ if doc.links?
37
+ doc.links.each do |link|
38
+ # +link+ is Philologic::Client::Link object
39
+ link.url # Link URL
40
+ link.text # Link text
41
+ end
42
+ end
25
43
 
26
- # Simple collocation table search.
27
- client.collocation('sartre').each { |result| puts result }
44
+ # Get +Array+ of document property keys.
45
+ doc.keys
28
46
 
29
- # Collocation table search by arbitrary key-value pairs
30
- client.collocation( :query => 'sartre', :foo => 1 ).each { |result| puts result }
47
+ # Get document properties
48
+ doc.each { |p| puts "%s\t%s" % [ p, doc[p] ] }
31
49
 
32
- # Links for first result in collocation table search
33
- client.collocation('sartre').first.links.each { |link| puts link }
34
50
 
51
+ # Concordance search
52
+ # Returns Philologic::Client::Concordance object
53
+ q = client.concordance('lion')
35
54
 
36
- # Simple frequency table search.
37
- client.frequency('sartre').each { |result| puts result }
55
+ # Get results if present.
56
+ if q.results?
57
+ q.results.each do |result|
58
+ # +result+ is a Philologic::Client::Occurrence object
59
+ end
60
+ end
38
61
 
39
- # Frequency table search by arbitrary key-value pairs
40
- client.frequency( :query => 'sartre', :foo => 1 ).each { |result| puts result }
41
62
 
42
- # Links for first result in frequency table search
43
- client.frequency('sartre').first.links.each { |link| puts link }
63
+ # Frequency search
64
+ # Returns Philologic::Client::Frequency object
65
+ q = client.frequency('lion')
44
66
 
67
+ # Get results if present
68
+ if q.results?
69
+ q.results.each do |result|
70
+ # +result+ is a Philologic::Client::FrequencyRow object
71
+ end
72
+ end
45
73
 
46
- # Get/Set encoding
47
- encoding = client.encoding
48
- client.encoding = 'utf-8' # Default
49
74
 
50
- # Get/Set endpoint
51
- endpoint = client.endpoint
52
- client.endpoint = 'http://philologic.example.org'
75
+ # Collocation search
76
+ # TODO - Pending implementation update
53
77
 
54
78
  end
55
79
 
@@ -68,8 +92,6 @@ https://github.com/blairc/philologic-client/
68
92
 
69
93
  == To Do
70
94
 
71
- * Add +children+ to Philologic::Document
72
- * Add +parent+ to Philologic::Document
73
- * Unify (as much as possible) Philologic::Document, Philologic::Link, Philologic::TableRow
95
+ * Collocation searches
74
96
  * Cache results?
75
97
 
data/Rakefile CHANGED
@@ -22,3 +22,29 @@ RDoc::Task.new do |rdoc|
22
22
  rdoc.rdoc_files.include('README.rdoc', 'lib/**/*.rb', 'doc/*.txt')
23
23
  end
24
24
 
25
+ desc 'Update test data files'
26
+ task :update_test_data do
27
+ require 'net/http'
28
+ require 'uri'
29
+
30
+ endpoint = 'http://pantagruel.ci.uchicago.edu/philo4/shakespeare/dispatcher.py'
31
+ test_data_dir = File.join( File.dirname(__FILE__), 'test', 'data' )
32
+ test_data_ext = '.html'
33
+ {
34
+ '?title=' => 'bibliography',
35
+ '?q=lion&report=frequency&field=collocates' => 'collocation',
36
+ '?q=lion' => 'concordance',
37
+ '?q=lion&report=frequency' => 'frequency',
38
+ '1' => 'navigation',
39
+ '1/1' => 'object'
40
+ }.each_pair do |request, file|
41
+ fn = "#{test_data_dir}/#{file}#{test_data_ext}"
42
+ uri = URI.parse( "#{endpoint}/#{request}" )
43
+ puts "updating #{fn} from #{uri} ..."
44
+
45
+ http = Net::HTTP.new(uri.host, uri.port)
46
+ request = http.get(uri.request_uri)
47
+ File.open(fn, 'w') { |fh| fh.puts request.body }
48
+ end
49
+ end
50
+
@@ -0,0 +1,60 @@
1
+
2
+ module Philologic # :nodoc:
3
+ class Client # :nodoc:
4
+
5
+ #
6
+ # Bibliography response returned by bibliography report.
7
+ #
8
+ # Extends Philologic::Client::Response
9
+ #
10
+ # = Usage
11
+ #
12
+ # # <div class='philologic_response'>
13
+ # # <p class='description'>Bibliography Report: 42 results.</p>
14
+ # # <ol class='philologic_cite_list'>
15
+ # # <li class='philologic_occurrence'>
16
+ # # <a href="./1/0/0/0/0" class='philologic_cite'>
17
+ # # <span class='philologic_property' title='author'>William Shakespeare</span>,
18
+ # # <i>
19
+ # # <span class='philologic_cite' title='title'>
20
+ # # The First Part of King Henry the Fourth
21
+ # # </span>
22
+ # # </i>
23
+ # # :
24
+ # # <span class='philologic_property' title='who'></span>
25
+ # # </a>
26
+ # # </li>
27
+ # # ...
28
+ # # </ol>
29
+ # # </div>
30
+ # biblio = Philologic::Client::Bibliography.new(html)
31
+ # biblio.titles.each do |title|
32
+ # ... Philologic::Client::Occurrence object ...
33
+ # end
34
+ #
35
+ class Bibliography < ::Philologic::Client::Response
36
+
37
+ #
38
+ # Get +Array+ of Philologic::Client::Occurrence objects
39
+ #
40
+ attr_reader :titles
41
+
42
+ #
43
+ # Initialize Philologic::Client::Bibliography object.
44
+ #
45
+ # Params:
46
+ # +document+:: Nokogiri document
47
+ # +client+:: (optional) Philologic::Client object or +nil+
48
+ #
49
+ def initialize(document, client = nil)
50
+ super
51
+
52
+ @properties = {} # TODO Sigh...
53
+ @titles = @doc.css('li.philologic_occurrence').collect { |o| Philologic::Client::Occurrence.new(o, client) }
54
+ end
55
+
56
+ end
57
+
58
+ end # class Client
59
+ end # module Philologic
60
+
@@ -0,0 +1,61 @@
1
+
2
+ module Philologic # :nodoc:
3
+ class Client # :nodoc:
4
+
5
+ #
6
+ # Philologic collocation table.
7
+ #
8
+ # Extends Philologic::Client::Response
9
+ #
10
+ class Collocation < ::Philologic::Client::Response
11
+
12
+ #
13
+ # Get +Array+ of Philologic::Client::CollocationRow objects
14
+ #
15
+ attr_reader :results
16
+
17
+ #
18
+ # Initialize Philologic::Client::Collocation object.
19
+ #
20
+ # Params:
21
+ # +document+:: Nokogiri document
22
+ # +client+:: (optional) Philologic::Client object or +nil+
23
+ #
24
+ def initialize(document, client = nil)
25
+ super
26
+
27
+ #@results = []
28
+ #table = @doc.css('table.philologic_table').first
29
+ #unless table.nil?
30
+ # #@results = table.css('tr').select { |tr| tr.css('tr > td') }.collect { |tr| tr }
31
+ # @results = table.css('tr').collect { |tr| tr }
32
+ #end
33
+ @results = @doc.css('tr').collect { |tr| tr } # XXX
34
+ # <tr>
35
+ # <td>
36
+ # <tr><td width="25%">bear (8)</td><td width="25%">bear (5)</td><td width="25%">prey
37
+ # (5)</td></tr>
38
+ #
39
+ # </td>
40
+ # </tr>
41
+ # header = @doc.css('table.philologic_table > tr.freq_header_row').first
42
+ # unless header.nil?
43
+ # @label_header = header.css('th.freq_label_header').first.text
44
+ # @value_header = header.css('th.freq_value_header').first.text
45
+ # end
46
+ #
47
+ # @results = @doc.css('tr.freq_row').collect { |tr| Philologic::Client::CollocationRow.new(tr) }
48
+ end
49
+
50
+ #
51
+ # Does this collocation table contain results?
52
+ #
53
+ def results?
54
+ @results.size > 0 ? true : false
55
+ end
56
+
57
+ end
58
+
59
+ end # class Client
60
+ end # module Philologic
61
+
@@ -0,0 +1,39 @@
1
+
2
+ module Philologic # :nodoc:
3
+ class Client # :nodoc:
4
+
5
+ #
6
+ # Philologic concordance search results.
7
+ #
8
+ class Concordance < ::Philologic::Client::Response
9
+
10
+ #
11
+ # Get +Array+ of Philologic::Client::Occurrence objects
12
+ #
13
+ attr_reader :results
14
+
15
+ #
16
+ # Initialize Philologic::Client::Concordance object.
17
+ #
18
+ # Params:
19
+ # +document+:: Nokogiri document
20
+ # +client+:: (optional) Philologic::Client object or +nil+
21
+ #
22
+ def initialize(document, client = nil)
23
+ super
24
+
25
+ @results = @doc.css('li.philologic_occurrence').collect { |o| Philologic::Client::Occurrence.new(o, client) }
26
+ end
27
+
28
+ #
29
+ # Were any results found?
30
+ #
31
+ def results?
32
+ @results.size > 0 ? true : false
33
+ end
34
+
35
+ end
36
+
37
+ end # class Client
38
+ end # module Philologic
39
+
@@ -0,0 +1,65 @@
1
+
2
+ module Philologic # :nodoc:
3
+ class Client # :nodoc:
4
+
5
+ #
6
+ # Philologic document.
7
+ #
8
+ class Document < ::Philologic::Client::Response
9
+
10
+ #
11
+ # Get document HTML or +nil+
12
+ #
13
+ attr_reader :html
14
+
15
+ #
16
+ # Get +Array+ of Philologic::Client::Link objects
17
+ #
18
+ attr_reader :links
19
+
20
+ #
21
+ # Get document text or +nil+
22
+ #
23
+ attr_reader :text
24
+
25
+ #
26
+ # Initialize Philologic::Client::Document object.
27
+ #
28
+ # Params:
29
+ # +document+:: Nokogiri document
30
+ # +client+:: (optional) Philologic::Client object or +nil+
31
+ #
32
+ def initialize(document, client = nil)
33
+ super
34
+
35
+ @html = @text = nil
36
+ unless @doc.css('div.philologic_object').first.nil?
37
+ @text = @doc.css('div.philologic_object').children.text
38
+ @html = @doc.css('div.philologic_object').children.to_html
39
+ end
40
+
41
+ @links = []
42
+ @doc.css('div.philologic_navigation > a').each do |a|
43
+ @links << ::Philologic::Client::Link.new( a.attributes['href'].value, a.text.strip )
44
+ end
45
+ end
46
+
47
+ #
48
+ # Does this document contain navigation links?
49
+ #
50
+ def links?
51
+ @links.size > 0 ? true : false
52
+ end
53
+
54
+ #
55
+ # Does this document contain text?
56
+ #
57
+ def text?
58
+ @text ? true : false
59
+ end
60
+
61
+ end
62
+
63
+ end # class Client
64
+ end # module Philologic
65
+
@@ -0,0 +1,57 @@
1
+
2
+ module Philologic # :nodoc:
3
+ class Client # :nodoc:
4
+
5
+ #
6
+ # Philologic frequency table.
7
+ #
8
+ # Extends Philologic::Client::Response
9
+ #
10
+ class Frequency < ::Philologic::Client::Response
11
+
12
+ #
13
+ # Get label header or +nil+
14
+ #
15
+ attr_reader :label_header
16
+
17
+ #
18
+ # Get +Array+ of Philologic::Client::FrequencyRow objects
19
+ #
20
+ attr_reader :results
21
+
22
+ #
23
+ # Get value header or +nil+
24
+ #
25
+ attr_reader :value_header
26
+
27
+ #
28
+ # Initialize Philologic::Client::Frequency object.
29
+ #
30
+ # Params:
31
+ # +document+:: Nokogiri document
32
+ # +client+:: (optional) Philologic::Client object or +nil+
33
+ #
34
+ def initialize(document, client = nil)
35
+ super
36
+
37
+ header = @doc.css('table.philologic_table > tr.freq_header_row').first
38
+ unless header.nil?
39
+ @label_header = header.css('th.freq_label_header').first.text
40
+ @value_header = header.css('th.freq_value_header').first.text
41
+ end
42
+
43
+ @results = @doc.css('tr.freq_row').collect { |tr| Philologic::Client::FrequencyRow.new(tr) }
44
+ end
45
+
46
+ #
47
+ # Does this frequency table contain results?
48
+ #
49
+ def results?
50
+ @results.size > 0 ? true : false
51
+ end
52
+
53
+ end
54
+
55
+ end # class Client
56
+ end # module Philologic
57
+
@@ -0,0 +1,67 @@
1
+
2
+ module Philologic # :nodoc:
3
+ class Client # :nodoc:
4
+
5
+ #
6
+ # Philologic frequency table row.
7
+ #
8
+ # = Usage
9
+ #
10
+ # # <tr class='freq_row'>
11
+ # # <td class='freq_label'>
12
+ # # <a href='./?q=lion&title=A+Midsummer+Night%27s+Dream'>
13
+ # # A Midsummer Night's Dream
14
+ # # </a>
15
+ # # </td>
16
+ # # <td class='freq_value'>
17
+ # # 30
18
+ # # </td>
19
+ # # </tr>
20
+ #
21
+ # r = Philologic::Client::FrequencyRow.new(html)
22
+ # r.label # "A Midsummer Night's Dream"
23
+ # r.link # './?q=lion&title=A+Midsummer+Night%27s+Dream'
24
+ # r.value # "0"
25
+ #
26
+ class FrequencyRow
27
+
28
+ #
29
+ # Get label or +nil+
30
+ #
31
+ attr_reader :label
32
+
33
+ #
34
+ # Get link or +nil+
35
+ #
36
+ attr_reader :link
37
+
38
+ #
39
+ # Get value or +nil+
40
+ #
41
+ attr_reader :value
42
+
43
+ #
44
+ # Initialize Philologic::Client::FrequencyRow object.
45
+ #
46
+ # Params:
47
+ # +document+:: Nokogiri document
48
+ #
49
+ def initialize(document)
50
+ @label = @link = @value = nil
51
+
52
+ unless document.css('td.freq_label').first.nil?
53
+ @label = document.css('td.freq_label').first.text
54
+ end
55
+ unless document.css('a').first.nil?
56
+ @link = document.css('a').first.attributes['href'].value
57
+ end
58
+ unless document.css('td.freq_value').first.nil?
59
+ @value = document.css('td.freq_value').first.text
60
+ end
61
+ end
62
+
63
+ end
64
+
65
+ end # class Client
66
+ end # module Philologic
67
+
@@ -0,0 +1,37 @@
1
+
2
+ module Philologic # :nodoc:
3
+ class Client # :nodoc:
4
+
5
+ #
6
+ # Philologic link.
7
+ #
8
+ class Link
9
+
10
+ #
11
+ # Get link text or nil.
12
+ #
13
+ attr_reader :text
14
+
15
+ #
16
+ # Get link URL.
17
+ #
18
+ attr_reader :url
19
+
20
+ #
21
+ # Initialize Philologic::Client::Link object.
22
+ #
23
+ # Params:
24
+ # +url+:: URL
25
+ # +text+:: (optional) Text or +nil+
26
+ #
27
+ def initialize(url, text = nil)
28
+ raise('nil link') if url.nil?
29
+ @text = text
30
+ @url = url
31
+ end
32
+
33
+ end
34
+
35
+ end # class Client
36
+ end # module Philologic
37
+