philologic-client 0.0.11 → 0.0.13

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. data/HISTORY.rdoc +8 -0
  2. data/PhiloLogicResponseDocumentation.txt +86 -0
  3. data/README.rdoc +54 -32
  4. data/Rakefile +26 -0
  5. data/lib/philologic-client/bibliography.rb +60 -0
  6. data/lib/philologic-client/collocation.rb +61 -0
  7. data/lib/philologic-client/concordance.rb +39 -0
  8. data/lib/philologic-client/document.rb +65 -0
  9. data/lib/philologic-client/frequency.rb +57 -0
  10. data/lib/philologic-client/frequency_row.rb +67 -0
  11. data/lib/philologic-client/link.rb +37 -0
  12. data/lib/philologic-client/occurrence.rb +79 -0
  13. data/lib/philologic-client/response.rb +63 -0
  14. data/lib/philologic-client/version.rb +1 -1
  15. data/lib/philologic-client.rb +102 -286
  16. data/test/data/bibliography.html +182 -0
  17. data/test/data/collocation.html +2594 -0
  18. data/test/data/concordance.html +758 -0
  19. data/test/data/frequency.html +73 -0
  20. data/test/data/navigation.html +69 -0
  21. data/test/data/object.html +20 -0
  22. data/test/test_bibliography.rb +78 -0
  23. data/test/test_client.rb +861 -0
  24. data/test/test_collocation.rb +76 -0
  25. data/test/test_concordance.rb +83 -0
  26. data/test/test_document.rb +127 -0
  27. data/test/test_frequency.rb +78 -0
  28. data/test/test_occurrence.rb +66 -0
  29. data/test/test_response.rb +41 -0
  30. metadata +55 -36
  31. data/doc/PhiloLogicResponseTemplates.txt +0 -46
  32. data/test/data/collocation_links.html +0 -145
  33. data/test/data/collocation_sartre.html +0 -67
  34. data/test/data/doc_file.html +0 -396
  35. data/test/data/frequency_links.html +0 -145
  36. data/test/data/frequency_sartre.html +0 -67
  37. data/test/data/query_sartre.html +0 -151
  38. data/test/data/root_file.html +0 -1851
  39. data/test/test_philologic_client.rb +0 -558
  40. data/test/test_philologic_link.rb +0 -101
data/HISTORY.rdoc CHANGED
@@ -1,5 +1,13 @@
1
1
  = Philologic::Client History
2
2
 
3
+ == 2012-03-30 v0.0.13
4
+
5
+ * Supports all of the Philologic v4 specification except collocation tables
6
+
7
+ == 2012-02-13 v0.0.12
8
+
9
+ * Revert to parsing with +Nokogiri::HTML+, not +Nokogiri::XML+
10
+
3
11
  == 2012-02-09 v0.0.11
4
12
 
5
13
  * Added +Philologic::Link#text()+
@@ -0,0 +1,86 @@
1
+ ---Concordance Report: returned by fulltext searches---
2
+
3
+ <div class='philologic_response'>
4
+ <ol class='philologic_concordance'>
5
+ <li class='philologic_occurrence'>
6
+ <span class='hit_n'> {hit_number} </span>
7
+ <a href =’{link_to_cited_object}' class='philologic_cite'>
8
+ <span class='philologic_property' title="{key}">{value}</span>
9
+ {more properties}
10
+ </a>
11
+ {possibly multiple links with their own properties--page images, etc.}
12
+ <div class='philologic_context'>{left context}
13
+ <span class='hilite'>{search term}<span>
14
+ …{possibly multiple separate hilt terms}…{right context}
15
+ </div>
16
+ </li>
17
+ {probably many occurrences}
18
+ </ol>
19
+ </div>
20
+
21
+
22
+ ---Bibliography Report: returned by metadata-only---
23
+
24
+ <div class='philologic_response'>
25
+ <ol class='philologic_cite_list'>
26
+ <li class='philologic_occurrence'>
27
+ <span class='hit_n'> {hit_number} </span>
28
+ <div class='philologic_cite'>
29
+ <a href='{link_to_cited_object}'>
30
+ <span class='philologic_property' title="{key}">{value}</span>
31
+ {more properties}
32
+ </a>
33
+ {possibly multiple links with their own properties--page images, etc.}
34
+ </div>
35
+ </li>
36
+ {probably many occurrences}
37
+ </ol>
38
+ </div>
39
+
40
+
41
+ ---Frequency Table: returned by frequency and collocation reports---
42
+ <div class='philologic_response'>
43
+ <table class='philologic_table'>
44
+ <tr class='freq_header_row'>
45
+ <th class='freq_checkbox_header'></th>
46
+ <th class='freq_label_header'> {facet_widget} </th>
47
+ <th class='freq_value_header'> {count} </th>
48
+ </tr>
49
+ <tr class='freq_row'>
50
+ <td class='freq_checkbox'><input type='checkbox'></input></td>
51
+ <td class='freq_label'> <a href='{link to concordance}'> {label} </a></td>
52
+ <td class='freq_value'> {count} </td>
53
+ </tr>
54
+ {probably many rows}
55
+ </table>
56
+ </div>
57
+
58
+
59
+ ---Object Viewer: returned by div and paragraph object access---
60
+
61
+ <div class='philologic_response'>
62
+ <div class=’philologic_cite’>
63
+ <span class='philologic_property' title="{key}">{value}</span>
64
+ {more properties}
65
+ </div>
66
+ <div class=’philologic_object’>
67
+ {various content}
68
+ <span class='hilite'>{hilit word}</span>
69
+ {possibly multiple hilit words}
70
+ </div>
71
+ </div>
72
+
73
+
74
+ ---Object Navigation: returned by document objects, links to children---
75
+
76
+ <div class='philologic_response'>
77
+ <div class=’philologic_cite’>
78
+ <span class='philologic_property' title="{key}">{value}</span>
79
+ {more properties}
80
+ </div>
81
+ <div class=’philologic_navigation’>
82
+ <a href=”zzz”>[link to child object]</a>
83
+ [links to all child objects]
84
+ </div>
85
+ </div>
86
+
data/README.rdoc CHANGED
@@ -6,50 +6,74 @@
6
6
 
7
7
  Philologic::Client.new(endpoint) do |client|
8
8
 
9
- # Get children of root
10
- client.children.each { |child| puts child }
9
+ # Get/Set encoding
10
+ encoding = client.encoding
11
+ client.encoding = 'utf-8' # Default
11
12
 
12
- # Get document
13
- first = client.children.first
14
- doc = first.document
15
- html = doc.html
16
- txt = doc.text
13
+ # Get/Set endpoint
14
+ endpoint = client.endpoint
15
+ client.endpoint = 'http://philologic.example.org'
17
16
 
17
+ # Get bibliography
18
+ # +biblio+ is a Philologic::Client::Bibliography object
19
+ biblio = client.bibliography
20
+
21
+ # Get reference to first title
22
+ # +first+ is a Philologic::Client::Occurrence object
23
+ first = biblio.titles.first
18
24
 
19
- # Simple (concordance) search
20
- client.search('sartre').each { |result| puts result }
25
+ # Get first title
26
+ # +doc+ will be a Philologic::Client::Document object
27
+ doc = client.document( first['href'] )
21
28
 
22
- # Search by arbitrary key-value pairs
23
- client.search( :query => 'sartre', :foo => 1 ).each { |result| puts result }
29
+ # Get text if present
30
+ if doc.text?
31
+ txt = doc.text # Document text
32
+ html = doc.html # Document HTML
33
+ end
24
34
 
35
+ # Get links if present
36
+ if doc.links?
37
+ doc.links.each do |link|
38
+ # +link+ is Philologic::Client::Link object
39
+ link.url # Link URL
40
+ link.text # Link text
41
+ end
42
+ end
25
43
 
26
- # Simple collocation table search.
27
- client.collocation('sartre').each { |result| puts result }
44
+ # Get +Array+ of document property keys.
45
+ doc.keys
28
46
 
29
- # Collocation table search by arbitrary key-value pairs
30
- client.collocation( :query => 'sartre', :foo => 1 ).each { |result| puts result }
47
+ # Get document properties
48
+ doc.each { |p| puts "%s\t%s" % [ p, doc[p] ] }
31
49
 
32
- # Links for first result in collocation table search
33
- client.collocation('sartre').first.links.each { |link| puts link }
34
50
 
51
+ # Concordance search
52
+ # Returns Philologic::Client::Concordance object
53
+ q = client.concordance('lion')
35
54
 
36
- # Simple frequency table search.
37
- client.frequency('sartre').each { |result| puts result }
55
+ # Get results if present.
56
+ if q.results?
57
+ q.results.each do |result|
58
+ # +result+ is a Philologic::Client::Occurrence object
59
+ end
60
+ end
38
61
 
39
- # Frequency table search by arbitrary key-value pairs
40
- client.frequency( :query => 'sartre', :foo => 1 ).each { |result| puts result }
41
62
 
42
- # Links for first result in frequency table search
43
- client.frequency('sartre').first.links.each { |link| puts link }
63
+ # Frequency search
64
+ # Returns Philologic::Client::Frequency object
65
+ q = client.frequency('lion')
44
66
 
67
+ # Get results if present
68
+ if q.results?
69
+ q.results.each do |result|
70
+ # +result+ is a Philologic::Client::FrequencyRow object
71
+ end
72
+ end
45
73
 
46
- # Get/Set encoding
47
- encoding = client.encoding
48
- client.encoding = 'utf-8' # Default
49
74
 
50
- # Get/Set endpoint
51
- endpoint = client.endpoint
52
- client.endpoint = 'http://philologic.example.org'
75
+ # Collocation search
76
+ # TODO - Pending implementation update
53
77
 
54
78
  end
55
79
 
@@ -68,8 +92,6 @@ https://github.com/blairc/philologic-client/
68
92
 
69
93
  == To Do
70
94
 
71
- * Add +children+ to Philologic::Document
72
- * Add +parent+ to Philologic::Document
73
- * Unify (as much as possible) Philologic::Document, Philologic::Link, Philologic::TableRow
95
+ * Collocation searches
74
96
  * Cache results?
75
97
 
data/Rakefile CHANGED
@@ -22,3 +22,29 @@ RDoc::Task.new do |rdoc|
22
22
  rdoc.rdoc_files.include('README.rdoc', 'lib/**/*.rb', 'doc/*.txt')
23
23
  end
24
24
 
25
+ desc 'Update test data files'
26
+ task :update_test_data do
27
+ require 'net/http'
28
+ require 'uri'
29
+
30
+ endpoint = 'http://pantagruel.ci.uchicago.edu/philo4/shakespeare/dispatcher.py'
31
+ test_data_dir = File.join( File.dirname(__FILE__), 'test', 'data' )
32
+ test_data_ext = '.html'
33
+ {
34
+ '?title=' => 'bibliography',
35
+ '?q=lion&report=frequency&field=collocates' => 'collocation',
36
+ '?q=lion' => 'concordance',
37
+ '?q=lion&report=frequency' => 'frequency',
38
+ '1' => 'navigation',
39
+ '1/1' => 'object'
40
+ }.each_pair do |request, file|
41
+ fn = "#{test_data_dir}/#{file}#{test_data_ext}"
42
+ uri = URI.parse( "#{endpoint}/#{request}" )
43
+ puts "updating #{fn} from #{uri} ..."
44
+
45
+ http = Net::HTTP.new(uri.host, uri.port)
46
+ request = http.get(uri.request_uri)
47
+ File.open(fn, 'w') { |fh| fh.puts request.body }
48
+ end
49
+ end
50
+
@@ -0,0 +1,60 @@
1
+
2
+ module Philologic # :nodoc:
3
+ class Client # :nodoc:
4
+
5
+ #
6
+ # Bibliography response returned by bibliography report.
7
+ #
8
+ # Extends Philologic::Client::Response
9
+ #
10
+ # = Usage
11
+ #
12
+ # # <div class='philologic_response'>
13
+ # # <p class='description'>Bibliography Report: 42 results.</p>
14
+ # # <ol class='philologic_cite_list'>
15
+ # # <li class='philologic_occurrence'>
16
+ # # <a href="./1/0/0/0/0" class='philologic_cite'>
17
+ # # <span class='philologic_property' title='author'>William Shakespeare</span>,
18
+ # # <i>
19
+ # # <span class='philologic_cite' title='title'>
20
+ # # The First Part of King Henry the Fourth
21
+ # # </span>
22
+ # # </i>
23
+ # # :
24
+ # # <span class='philologic_property' title='who'></span>
25
+ # # </a>
26
+ # # </li>
27
+ # # ...
28
+ # # </ol>
29
+ # # </div>
30
+ # biblio = Philologic::Client::Bibliography.new(html)
31
+ # biblio.titles.each do |title|
32
+ # ... Philologic::Client::Occurrence object ...
33
+ # end
34
+ #
35
+ class Bibliography < ::Philologic::Client::Response
36
+
37
+ #
38
+ # Get +Array+ of Philologic::Client::Occurrence objects
39
+ #
40
+ attr_reader :titles
41
+
42
+ #
43
+ # Initialize Philologic::Client::Bibliography object.
44
+ #
45
+ # Params:
46
+ # +document+:: Nokogiri document
47
+ # +client+:: (optional) Philologic::Client object or +nil+
48
+ #
49
+ def initialize(document, client = nil)
50
+ super
51
+
52
+ @properties = {} # TODO Sigh...
53
+ @titles = @doc.css('li.philologic_occurrence').collect { |o| Philologic::Client::Occurrence.new(o, client) }
54
+ end
55
+
56
+ end
57
+
58
+ end # class Client
59
+ end # module Philologic
60
+
@@ -0,0 +1,61 @@
1
+
2
+ module Philologic # :nodoc:
3
+ class Client # :nodoc:
4
+
5
+ #
6
+ # Philologic collocation table.
7
+ #
8
+ # Extends Philologic::Client::Response
9
+ #
10
+ class Collocation < ::Philologic::Client::Response
11
+
12
+ #
13
+ # Get +Array+ of Philologic::Client::CollocationRow objects
14
+ #
15
+ attr_reader :results
16
+
17
+ #
18
+ # Initialize Philologic::Client::Collocation object.
19
+ #
20
+ # Params:
21
+ # +document+:: Nokogiri document
22
+ # +client+:: (optional) Philologic::Client object or +nil+
23
+ #
24
+ def initialize(document, client = nil)
25
+ super
26
+
27
+ #@results = []
28
+ #table = @doc.css('table.philologic_table').first
29
+ #unless table.nil?
30
+ # #@results = table.css('tr').select { |tr| tr.css('tr > td') }.collect { |tr| tr }
31
+ # @results = table.css('tr').collect { |tr| tr }
32
+ #end
33
+ @results = @doc.css('tr').collect { |tr| tr } # XXX
34
+ # <tr>
35
+ # <td>
36
+ # <tr><td width="25%">bear (8)</td><td width="25%">bear (5)</td><td width="25%">prey
37
+ # (5)</td></tr>
38
+ #
39
+ # </td>
40
+ # </tr>
41
+ # header = @doc.css('table.philologic_table > tr.freq_header_row').first
42
+ # unless header.nil?
43
+ # @label_header = header.css('th.freq_label_header').first.text
44
+ # @value_header = header.css('th.freq_value_header').first.text
45
+ # end
46
+ #
47
+ # @results = @doc.css('tr.freq_row').collect { |tr| Philologic::Client::CollocationRow.new(tr) }
48
+ end
49
+
50
+ #
51
+ # Does this collocation table contain results?
52
+ #
53
+ def results?
54
+ @results.size > 0 ? true : false
55
+ end
56
+
57
+ end
58
+
59
+ end # class Client
60
+ end # module Philologic
61
+
@@ -0,0 +1,39 @@
1
+
2
+ module Philologic # :nodoc:
3
+ class Client # :nodoc:
4
+
5
+ #
6
+ # Philologic concordance search results.
7
+ #
8
+ class Concordance < ::Philologic::Client::Response
9
+
10
+ #
11
+ # Get +Array+ of Philologic::Client::Occurrence objects
12
+ #
13
+ attr_reader :results
14
+
15
+ #
16
+ # Initialize Philologic::Client::Concordance object.
17
+ #
18
+ # Params:
19
+ # +document+:: Nokogiri document
20
+ # +client+:: (optional) Philologic::Client object or +nil+
21
+ #
22
+ def initialize(document, client = nil)
23
+ super
24
+
25
+ @results = @doc.css('li.philologic_occurrence').collect { |o| Philologic::Client::Occurrence.new(o, client) }
26
+ end
27
+
28
+ #
29
+ # Were any results found?
30
+ #
31
+ def results?
32
+ @results.size > 0 ? true : false
33
+ end
34
+
35
+ end
36
+
37
+ end # class Client
38
+ end # module Philologic
39
+
@@ -0,0 +1,65 @@
1
+
2
+ module Philologic # :nodoc:
3
+ class Client # :nodoc:
4
+
5
+ #
6
+ # Philologic document.
7
+ #
8
+ class Document < ::Philologic::Client::Response
9
+
10
+ #
11
+ # Get document HTML or +nil+
12
+ #
13
+ attr_reader :html
14
+
15
+ #
16
+ # Get +Array+ of Philologic::Client::Link objects
17
+ #
18
+ attr_reader :links
19
+
20
+ #
21
+ # Get document text or +nil+
22
+ #
23
+ attr_reader :text
24
+
25
+ #
26
+ # Initialize Philologic::Client::Document object.
27
+ #
28
+ # Params:
29
+ # +document+:: Nokogiri document
30
+ # +client+:: (optional) Philologic::Client object or +nil+
31
+ #
32
+ def initialize(document, client = nil)
33
+ super
34
+
35
+ @html = @text = nil
36
+ unless @doc.css('div.philologic_object').first.nil?
37
+ @text = @doc.css('div.philologic_object').children.text
38
+ @html = @doc.css('div.philologic_object').children.to_html
39
+ end
40
+
41
+ @links = []
42
+ @doc.css('div.philologic_navigation > a').each do |a|
43
+ @links << ::Philologic::Client::Link.new( a.attributes['href'].value, a.text.strip )
44
+ end
45
+ end
46
+
47
+ #
48
+ # Does this document contain navigation links?
49
+ #
50
+ def links?
51
+ @links.size > 0 ? true : false
52
+ end
53
+
54
+ #
55
+ # Does this document contain text?
56
+ #
57
+ def text?
58
+ @text ? true : false
59
+ end
60
+
61
+ end
62
+
63
+ end # class Client
64
+ end # module Philologic
65
+
@@ -0,0 +1,57 @@
1
+
2
+ module Philologic # :nodoc:
3
+ class Client # :nodoc:
4
+
5
+ #
6
+ # Philologic frequency table.
7
+ #
8
+ # Extends Philologic::Client::Response
9
+ #
10
+ class Frequency < ::Philologic::Client::Response
11
+
12
+ #
13
+ # Get label header or +nil+
14
+ #
15
+ attr_reader :label_header
16
+
17
+ #
18
+ # Get +Array+ of Philologic::Client::FrequencyRow objects
19
+ #
20
+ attr_reader :results
21
+
22
+ #
23
+ # Get value header or +nil+
24
+ #
25
+ attr_reader :value_header
26
+
27
+ #
28
+ # Initialize Philologic::Client::Frequency object.
29
+ #
30
+ # Params:
31
+ # +document+:: Nokogiri document
32
+ # +client+:: (optional) Philologic::Client object or +nil+
33
+ #
34
+ def initialize(document, client = nil)
35
+ super
36
+
37
+ header = @doc.css('table.philologic_table > tr.freq_header_row').first
38
+ unless header.nil?
39
+ @label_header = header.css('th.freq_label_header').first.text
40
+ @value_header = header.css('th.freq_value_header').first.text
41
+ end
42
+
43
+ @results = @doc.css('tr.freq_row').collect { |tr| Philologic::Client::FrequencyRow.new(tr) }
44
+ end
45
+
46
+ #
47
+ # Does this frequency table contain results?
48
+ #
49
+ def results?
50
+ @results.size > 0 ? true : false
51
+ end
52
+
53
+ end
54
+
55
+ end # class Client
56
+ end # module Philologic
57
+
@@ -0,0 +1,67 @@
1
+
2
+ module Philologic # :nodoc:
3
+ class Client # :nodoc:
4
+
5
+ #
6
+ # Philologic frequency table row.
7
+ #
8
+ # = Usage
9
+ #
10
+ # # <tr class='freq_row'>
11
+ # # <td class='freq_label'>
12
+ # # <a href='./?q=lion&title=A+Midsummer+Night%27s+Dream'>
13
+ # # A Midsummer Night's Dream
14
+ # # </a>
15
+ # # </td>
16
+ # # <td class='freq_value'>
17
+ # # 30
18
+ # # </td>
19
+ # # </tr>
20
+ #
21
+ # r = Philologic::Client::FrequencyRow.new(html)
22
+ # r.label # "A Midsummer Night's Dream"
23
+ # r.link # './?q=lion&title=A+Midsummer+Night%27s+Dream'
24
+ # r.value # "0"
25
+ #
26
+ class FrequencyRow
27
+
28
+ #
29
+ # Get label or +nil+
30
+ #
31
+ attr_reader :label
32
+
33
+ #
34
+ # Get link or +nil+
35
+ #
36
+ attr_reader :link
37
+
38
+ #
39
+ # Get value or +nil+
40
+ #
41
+ attr_reader :value
42
+
43
+ #
44
+ # Initialize Philologic::Client::FrequencyRow object.
45
+ #
46
+ # Params:
47
+ # +document+:: Nokogiri document
48
+ #
49
+ def initialize(document)
50
+ @label = @link = @value = nil
51
+
52
+ unless document.css('td.freq_label').first.nil?
53
+ @label = document.css('td.freq_label').first.text
54
+ end
55
+ unless document.css('a').first.nil?
56
+ @link = document.css('a').first.attributes['href'].value
57
+ end
58
+ unless document.css('td.freq_value').first.nil?
59
+ @value = document.css('td.freq_value').first.text
60
+ end
61
+ end
62
+
63
+ end
64
+
65
+ end # class Client
66
+ end # module Philologic
67
+
@@ -0,0 +1,37 @@
1
+
2
+ module Philologic # :nodoc:
3
+ class Client # :nodoc:
4
+
5
+ #
6
+ # Philologic link.
7
+ #
8
+ class Link
9
+
10
+ #
11
+ # Get link text or nil.
12
+ #
13
+ attr_reader :text
14
+
15
+ #
16
+ # Get link URL.
17
+ #
18
+ attr_reader :url
19
+
20
+ #
21
+ # Initialize Philologic::Client::Link object.
22
+ #
23
+ # Params:
24
+ # +url+:: URL
25
+ # +text+:: (optional) Text or +nil+
26
+ #
27
+ def initialize(url, text = nil)
28
+ raise('nil link') if url.nil?
29
+ @text = text
30
+ @url = url
31
+ end
32
+
33
+ end
34
+
35
+ end # class Client
36
+ end # module Philologic
37
+