philologic-client 0.0.11 → 0.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. data/HISTORY.rdoc +8 -0
  2. data/PhiloLogicResponseDocumentation.txt +86 -0
  3. data/README.rdoc +54 -32
  4. data/Rakefile +26 -0
  5. data/lib/philologic-client/bibliography.rb +60 -0
  6. data/lib/philologic-client/collocation.rb +61 -0
  7. data/lib/philologic-client/concordance.rb +39 -0
  8. data/lib/philologic-client/document.rb +65 -0
  9. data/lib/philologic-client/frequency.rb +57 -0
  10. data/lib/philologic-client/frequency_row.rb +67 -0
  11. data/lib/philologic-client/link.rb +37 -0
  12. data/lib/philologic-client/occurrence.rb +79 -0
  13. data/lib/philologic-client/response.rb +63 -0
  14. data/lib/philologic-client/version.rb +1 -1
  15. data/lib/philologic-client.rb +102 -286
  16. data/test/data/bibliography.html +182 -0
  17. data/test/data/collocation.html +2594 -0
  18. data/test/data/concordance.html +758 -0
  19. data/test/data/frequency.html +73 -0
  20. data/test/data/navigation.html +69 -0
  21. data/test/data/object.html +20 -0
  22. data/test/test_bibliography.rb +78 -0
  23. data/test/test_client.rb +861 -0
  24. data/test/test_collocation.rb +76 -0
  25. data/test/test_concordance.rb +83 -0
  26. data/test/test_document.rb +127 -0
  27. data/test/test_frequency.rb +78 -0
  28. data/test/test_occurrence.rb +66 -0
  29. data/test/test_response.rb +41 -0
  30. metadata +55 -36
  31. data/doc/PhiloLogicResponseTemplates.txt +0 -46
  32. data/test/data/collocation_links.html +0 -145
  33. data/test/data/collocation_sartre.html +0 -67
  34. data/test/data/doc_file.html +0 -396
  35. data/test/data/frequency_links.html +0 -145
  36. data/test/data/frequency_sartre.html +0 -67
  37. data/test/data/query_sartre.html +0 -151
  38. data/test/data/root_file.html +0 -1851
  39. data/test/test_philologic_client.rb +0 -558
  40. data/test/test_philologic_link.rb +0 -101
@@ -0,0 +1,79 @@
1
+
2
+ module Philologic # :nodoc:
3
+ class Client # :nodoc:
4
+
5
+ #
6
+ # Philologic citation occurrence.
7
+ #
8
+ # Extends Philologic::Client::Response
9
+ #
10
+ # = Usage
11
+ #
12
+ # # <li class='philologic_occurrence'>
13
+ # # <a href="./1/0/0/0/0" class='philologic_cite'>
14
+ # # <span class='philologic_property' title='author'>William Shakespeare</span>,
15
+ # # <i>
16
+ # # <span class='philologic_cite' title='title'>
17
+ # # The First Part of King Henry the Fourth
18
+ # # </span>
19
+ # # </i>
20
+ # # :
21
+ # # <span class='philologic_property' title='who'></span>
22
+ # # </a>
23
+ # # </li>
24
+ # o = Philologic::Client::Occurrence.new(html)
25
+ # o['author'] # 'William Shakespeare''
26
+ # o['title'] # 'The First Part of King Henry the Fourth'
27
+ # o['href'] # './1/0/0/0/0'
28
+ #
29
+ class Occurrence < ::Philologic::Client::Response
30
+
31
+ #
32
+ # Get document HTML or +nil+
33
+ #
34
+ attr_reader :html
35
+
36
+ #
37
+ # Get document text or +nil+
38
+ #
39
+ attr_reader :text
40
+
41
+ #
42
+ # Initialize Philologic::Client::Occurrence object.
43
+ #
44
+ # Params:
45
+ # +document+:: Nokogiri document
46
+ # +client+:: (optional) Philologic::Client object or +nil+
47
+ #
48
+ def initialize(document, client = nil)
49
+ super
50
+
51
+ @doc.css('a').each do |a|
52
+ @properties['href'] = a.attributes['href'].children.text
53
+ end
54
+ @doc.css('span.hit_n').each do |span|
55
+ @properties['hit_n'] = span.children.text.to_i
56
+ end
57
+ @doc.css('span.philologic_cite').each do |cite|
58
+ @properties[ cite.attributes['title'].value ] = cite.children.text
59
+ end
60
+
61
+ @html = @text = nil
62
+ unless @doc.css('div.philologic_context').first.nil?
63
+ @text = @doc.css('div.philologic_context').children.text
64
+ @html = @doc.css('div.philologic_context').children.to_html
65
+ end
66
+ end
67
+
68
+ #
69
+ # Does this document contain text?
70
+ #
71
+ def text?
72
+ @text ? true : false
73
+ end
74
+
75
+ end
76
+
77
+ end # class Client
78
+ end # module Philologic
79
+
@@ -0,0 +1,63 @@
1
+
2
+ module Philologic # :nodoc:
3
+ class Client # :nodoc:
4
+
5
+ #
6
+ # Generic server response.
7
+ #
8
+ # Extended by Philologic::Client::Bibliography, Philologic::Client::Collocation,
9
+ # Philologic::Client::Concordance, Philologic::Client::Document, Philologic::Cilent::Frequency
10
+ # and Philologic::Client::Occurrence
11
+ #
12
+ class Response
13
+ include Enumerable
14
+
15
+ #
16
+ # Get Philologic::Client or +nil+
17
+ #
18
+ attr_reader :client
19
+
20
+ #
21
+ # Initialize Philologic::Client::Response object.
22
+ #
23
+ # Params:
24
+ # +document+:: Nokogiri document
25
+ # +client+:: (optional) Philologic::Client object or +nil+
26
+ #
27
+ def initialize(document, client = nil)
28
+ raise('nil document') if document.nil?
29
+ @client = client
30
+ @doc = document
31
+
32
+ @properties = {}
33
+ @doc.css('span.philologic_property').each do |p|
34
+ @properties[ p.attributes['title'].value ] = p.children.text
35
+ end
36
+ end
37
+
38
+ #
39
+ # Returns value of attribute +key+ or +nil+.
40
+ #
41
+ def [](key)
42
+ @properties[key]
43
+ end
44
+
45
+ #
46
+ # Yield successive property keys.
47
+ #
48
+ def each(&block)
49
+ keys.each { |k| block.call(k) }
50
+ end
51
+
52
+ #
53
+ # +Array+ of sorted property keys.
54
+ #
55
+ def keys
56
+ @properties.keys.sort
57
+ end
58
+
59
+ end
60
+
61
+ end # class Client
62
+ end # module Philologic
63
+
@@ -5,6 +5,6 @@ module Philologic
5
5
  #
6
6
  # Philologic::Client version
7
7
  #
8
- VERSION = '0.0.11'
8
+ VERSION = '0.0.13'
9
9
  end
10
10
  end
@@ -1,10 +1,20 @@
1
1
  # encoding: utf-8
2
2
 
3
- require 'philologic-client/version'
4
-
5
3
  require 'nokogiri'
6
4
  require 'open-uri'
7
5
 
6
+ require 'philologic-client/link'
7
+ require 'philologic-client/frequency_row'
8
+ require 'philologic-client/response'
9
+ require 'philologic-client/version'
10
+
11
+ require 'philologic-client/bibliography'
12
+ require 'philologic-client/collocation'
13
+ require 'philologic-client/concordance'
14
+ require 'philologic-client/document'
15
+ require 'philologic-client/frequency'
16
+ require 'philologic-client/occurrence'
17
+
8
18
  #
9
19
  # = Philologic::Client - Ruby client for interacting with the Philologic API.
10
20
  #
@@ -14,50 +24,74 @@ require 'open-uri'
14
24
  #
15
25
  # Philologic::Client.new(endpoint) do |client|
16
26
  #
17
- # # Get children of root
18
- # client.children.each { |child| puts child }
27
+ # # Get/Set encoding
28
+ # encoding = client.encoding
29
+ # client.encoding = 'utf-8' # Default
19
30
  #
20
- # # Get document
21
- # first = client.children.first
22
- # doc = first.document
23
- # html = doc.html
24
- # txt = doc.text
31
+ # # Get/Set endpoint
32
+ # endpoint = client.endpoint
33
+ # client.endpoint = 'http://philologic.example.org'
25
34
  #
35
+ # # Get bibliography
36
+ # # +biblio+ is a Philologic::Client::Bibliography object
37
+ # biblio = client.bibliography
38
+ #
39
+ # # Get reference to first title
40
+ # # +first+ is a Philologic::Client::Occurrence object
41
+ # first = biblio.titles.first
26
42
  #
27
- # # Simple (concordance) search
28
- # client.search('sartre').each { |result| puts result }
43
+ # # Get first title
44
+ # # +doc+ will be a Philologic::Client::Document object
45
+ # doc = client.document( first['href'] )
29
46
  #
30
- # # Search by arbitrary key-value pairs
31
- # client.search( :query => 'sartre', :foo => 1 ).each { |result| puts result }
47
+ # # Get text if present
48
+ # if doc.text?
49
+ # txt = doc.text # Document text
50
+ # html = doc.html # Document HTML
51
+ # end
32
52
  #
53
+ # # Get links if present
54
+ # if doc.links?
55
+ # doc.links.each do |link|
56
+ # # +link+ is Philologic::Client::Link object
57
+ # link.url # Link URL
58
+ # link.text # Link text
59
+ # end
60
+ # end
33
61
  #
34
- # # Simple collocation table search.
35
- # client.collocation('sartre').each { |result| puts result }
62
+ # # Get +Array+ of document property keys.
63
+ # doc.keys
36
64
  #
37
- # # Collocation table search by arbitrary key-value pairs
38
- # client.collocation( :query => 'sartre', :foo => 1 ).each { |result| puts result }
65
+ # # Get document properties
66
+ # doc.each { |p| puts "%s\t%s" % [ p, doc[p] ] }
39
67
  #
40
- # # Links for first result in collocation table search
41
- # client.collocation('sartre').first.links.each { |link| puts link }
42
68
  #
69
+ # # Concordance search
70
+ # # Returns Philologic::Client::Concordance object
71
+ # q = client.concordance('lion')
43
72
  #
44
- # # Simple frequency table search.
45
- # client.frequency('sartre').each { |result| puts result }
73
+ # # Get results if present.
74
+ # if q.results?
75
+ # q.results.each do |result|
76
+ # # +result+ is a Philologic::Client::Occurrence object
77
+ # end
78
+ # end
46
79
  #
47
- # # Frequency table search by arbitrary key-value pairs
48
- # client.frequency( :query => 'sartre', :foo => 1 ).each { |result| puts result }
49
80
  #
50
- # # Links for first result in frequency table search
51
- # client.frequency('sartre').first.links.each { |link| puts link }
81
+ # # Frequency search
82
+ # # Returns Philologic::Client::Frequency object
83
+ # q = client.frequency('lion')
52
84
  #
85
+ # # Get results if present
86
+ # if q.results?
87
+ # q.results.each do |result|
88
+ # # +result+ is a Philologic::Client::FrequencyRow object
89
+ # end
90
+ # end
53
91
  #
54
- # # Get/Set encoding
55
- # encoding = client.encoding
56
- # client.encoding = 'utf-8' # Default
57
92
  #
58
- # # Get/Set endpoint
59
- # endpoint = client.endpoint
60
- # client.endpoint = 'http://philologic.example.org'
93
+ # # Collocation search
94
+ # # TODO - Pending implementation update
61
95
  #
62
96
  # end
63
97
  #
@@ -76,9 +110,7 @@ require 'open-uri'
76
110
  #
77
111
  # == To Do
78
112
  #
79
- # * Add +children+ to Philologic::Document
80
- # * Add +parent+ to Philologic::Document
81
- # * Unify (as much as possible) Philologic::Document, Philologic::Link, Philologic::TableRow
113
+ # * Collocation searches
82
114
  # * Cache results?
83
115
  #
84
116
  module Philologic # :nodoc:
@@ -116,71 +148,51 @@ module Philologic # :nodoc:
116
148
  self
117
149
  end
118
150
 
151
+ #
152
+ # Returns Philologic::Client::Bibliography object or +nil+
119
153
  #
120
- # Return +Array+ of Philologic::Link objects.
121
- #
122
- # Params:
123
- # +path+:: Return children of this path (defaults to +/+)
124
- #
125
- def children( path = '/' )
126
- doc = _get(path)
127
- return doc.kind_of?(Array) ? doc : nil
154
+ def bibliography
155
+ result = _query( 'title' => '' )
156
+ return result.kind_of?(Philologic::Client::Bibliography) ? result : nil
128
157
  end
129
158
 
130
159
  #
131
- # Return +Array+ of Philologic::TableRow objects.
132
- #
133
- # *NOTE:* Currently only returns the first page of results by default.
160
+ # Return Philologic::Client::Concordance objet or +nil+.
134
161
  #
135
162
  # Params:
136
- # +query+:: Search for this query +String+ or +Hash+ key-value pairs.
163
+ # +query+:: Search for this +query+ string.
137
164
  #
138
- def collocation(query)
139
- raise('no query specified') if ( query.nil? || !( [ Hash, String ].include?( query.class ) )) || query.empty?
140
- defaults = { :report => 'frequency', :field => 'collocates' }
141
- doc = search( query.kind_of?(String) ? defaults.merge(:query => query) : defaults.merge(query) )
142
- return doc.kind_of?(Array) ? doc : nil
165
+ def concordance(query)
166
+ raise('nil query') if ( query.nil? || !( [ Hash, String ].include?( query.class ) )) || query.empty?
167
+ defaults = {}
168
+ r = _query( query.kind_of?(String) ? defaults.merge( :q => query ) : defaults.merge(query) )
169
+ return r.kind_of?(Philologic::Client::Concordance) ? r : nil
143
170
  end
144
171
 
145
172
  #
146
- # Return Philologic::Document object or +nil+.
173
+ # Return object extending Philologic::Client::Document class or +nil+.
147
174
  #
148
175
  # Params:
149
- # +path+:: Return document at this path (defaults to +/+)
176
+ # +path+:: Return document at this path
150
177
  #
151
- def document( path = '/' )
178
+ def document(path)
152
179
  doc = _get(path)
153
- return doc.kind_of?(::Philologic::Document) ? doc : nil
180
+ doc.kind_of?(::Philologic::Client::Document) ? doc : nil
154
181
  end
155
182
 
156
183
  #
157
- # Return +Array+ of Philologic::TableRow objects.
158
- #
159
- # *NOTE:* Currently only returns the first page of results by default.
184
+ # Return Philologic::Client::Frequency objet or +nil+.
160
185
  #
161
186
  # Params:
162
- # +query+:: Search for this query +String+ or +Hash+ key-value pairs.
187
+ # +query+:: Search for this +query+ string.
163
188
  #
164
189
  def frequency(query)
165
- raise('no query specified') if ( query.nil? || !( [ Hash, String ].include?( query.class ) )) || query.empty?
190
+ raise('nil query') if ( query.nil? || !( [ Hash, String ].include?( query.class ) )) || query.empty?
166
191
  defaults = { :report => 'frequency' }
167
- doc = search( query.kind_of?(String) ? defaults.merge(:query => query) : defaults.merge(query) )
168
- return doc.kind_of?(Array) ? doc : nil
192
+ r = _query( query.kind_of?(String) ? defaults.merge( :q => query ) : defaults.merge(query) )
193
+ return r.kind_of?(Philologic::Client::Frequency) ? r : nil
169
194
  end
170
195
 
171
- #
172
- # Return +Array+ of Philologic::Link objects.
173
- #
174
- # *NOTE:* Currently only returns the first page of results by default.
175
- #
176
- # Params:
177
- # +query+:: Search for this query +String+ or +Hash+ key-value pairs.
178
- #
179
- def search(query)
180
- raise('no query specified') if ( query.nil? || !( [ Hash, String ].include?( query.class ) )) || query.empty?
181
- doc = _query( query.kind_of?(String) ? { :query => query } : query )
182
- return doc.kind_of?(Array) ? doc : nil
183
- end
184
196
 
185
197
  private
186
198
 
@@ -199,7 +211,7 @@ module Philologic # :nodoc:
199
211
  # TODO Use +URI+
200
212
  raise('no endpoint specified') if @endpoint.nil?
201
213
  uri = "#{ @endpoint }#{ @endpoint.end_with?('/') ? '' : '/' }#{path}".gsub( %r{/+$}, '/' )
202
- _parse( Nokogiri::XML( open(uri).read, nil, @encoding ) )
214
+ _parse( Nokogiri::HTML( open(uri).read, nil, @encoding ) )
203
215
  end
204
216
 
205
217
  #
@@ -207,31 +219,20 @@ module Philologic # :nodoc:
207
219
  #
208
220
  # TODO This needs some love.
209
221
  def _parse(doc)
210
- if !doc.css('div.philologic_cite_list').first.nil?
211
- children = []
212
- doc.css('div.philologic_cite_list').css('a.philologic_cite').each do |cite|
213
- children << Philologic::Link.new(self, cite) # TODO Why not a select?
214
- end
215
- return children
216
- elsif !doc.css('div.philologic_concordance').first.nil?
217
- results = []
218
- doc.css('div.philologic_concordance').css('li.philologic_occurence').each do |occurence|
219
- results << Philologic::Link.new(self, occurence) # TODO Why not a select?
220
- end
221
- return results
222
- elsif !doc.css('div.philologic_object').first.nil?
223
- return Philologic::Document.new( doc.css('div.philologic_object').first )
224
- elsif !doc.css('table.philologic_frequency').first.nil?
225
- results = []
226
- # Results are keyed off of this field.
227
- field = doc.css('table.philologic_frequency').first.attributes['title'].value
228
- doc.css('table.philologic_frequency').css('tr.philologic_frequency_row').each do |row|
229
- results << Philologic::TableRow.new(self, field, row)
230
- end
231
- return results
232
- # TODO else
233
- # warn "WARNING: could not parse document"
222
+ return nil if doc.css('div.philologic_response').first.nil?
223
+
224
+ if doc.css('ol.philologic_cite_list').first
225
+ return Philologic::Client::Bibliography.new(doc)
226
+ elsif doc.css('ol.philologic_concordance').first
227
+ return Philologic::Client::Concordance.new(doc)
228
+ elsif doc.css('div.philologic_navigation').first
229
+ return Philologic::Client::Document.new(doc)
230
+ elsif doc.css('div.philologic_object').first
231
+ return Philologic::Client::Document.new(doc)
232
+ elsif doc.css('table.philologic_table > tr.freq_header_row').first
233
+ return Philologic::Client::Frequency.new(doc)
234
234
  end
235
+
235
236
  nil
236
237
  end
237
238
 
@@ -244,190 +245,5 @@ module Philologic # :nodoc:
244
245
 
245
246
  end # class Philologic::Client
246
247
 
247
-
248
- #
249
- # Philologic document.
250
- #
251
- class Document
252
-
253
- #
254
- # Initialize Philologic::Document object.
255
- #
256
- # Params:
257
- # +document+:: Nokogiri document
258
- #
259
- def initialize(document)
260
- @doc = document
261
- @text = nil
262
- end
263
-
264
- #
265
- # Returns value of attribute +key+ or +nil+.
266
- #
267
- def [](key)
268
- @doc.attributes.key?(key) ? @doc.attributes[key].value : nil
269
- end
270
-
271
- #
272
- # Returns document HTML.
273
- #
274
- def html
275
- @doc.css('div.context_container').to_html
276
- end
277
-
278
- #
279
- # Returns document text.
280
- #
281
- def text
282
- @text = @doc.text.lstrip.rstrip if @text.nil?
283
- @text
284
- end
285
-
286
- end # class Philologic::Document
287
-
288
-
289
- #
290
- # Philologic link.
291
- #
292
- class Link
293
-
294
- #
295
- # Initialize Philologic::Link object.
296
- #
297
- # Params:
298
- # +client+:: Philologic::Client object
299
- # +document+:: Nokogiri document
300
- #
301
- def initialize(client, document)
302
- raise('nil client') if client.nil?
303
- raise('nil document') if document.nil?
304
- @client = client
305
- @doc = document
306
- @properties = {}
307
-
308
- if !@doc.css('a.philologic_cite').first.nil?
309
- @doc.css('a.philologic_cite').each do |a|
310
- a.attributes.keys.each { |k| @properties[k] = a.attributes[k].value }
311
- end
312
- end
313
- if @doc.respond_to?(:attributes)
314
- @doc.attributes.keys.each { |k| @properties[k] = @doc.attributes[k].value }
315
- end
316
- @doc.css('span.philologic_property').each do |span|
317
- @properties[ span.attributes['title'].value ] = span.children.text
318
- end
319
- end
320
-
321
- #
322
- # Returns value of attribute +key+ or +nil+.
323
- #
324
- def [](key)
325
- @properties[key]
326
- end
327
-
328
- #
329
- # Returns Philologic::Document referenced by this Philologic::Link object.
330
- #
331
- def document
332
- @client.document( self['href'] )
333
- end
334
-
335
- #
336
- # Returns Philologic::Link text.
337
- #
338
- def text
339
- @doc.text.strip
340
- end
341
-
342
- #
343
- # Return String representation of object.
344
- #
345
- def to_s
346
- [ self['href'], self['title'], self['author'] ].join(' | ')
347
- end
348
-
349
- end # class Philologic::Link
350
-
351
-
352
- #
353
- # Philologic table row.
354
- #
355
- class TableRow
356
-
357
- #
358
- # Get +count+
359
- #
360
- attr_reader :count
361
-
362
- #
363
- # Get +field+
364
- #
365
- attr_reader :field
366
-
367
- #
368
- # Get +href+
369
- #
370
- attr_reader :href
371
-
372
- #
373
- # Get +text+
374
- #
375
- attr_reader :text
376
-
377
-
378
- #
379
- # Initialize Philologic::TableRow object.
380
- #
381
- # Params:
382
- # +client+:: Philologic::Client object
383
- # +field+:: Row is keyed off of this field
384
- # +document+:: Nokogiri document
385
- #
386
- def initialize(client, field, document)
387
- @client = client
388
- @field = field
389
- @doc = document
390
-
391
- @doc.css('td.philologic_frequency_key').each do |key|
392
- key.css('a').each do |a|
393
- @href = a.attributes['href'].value
394
- @text = a.text.lstrip.rstrip
395
- break
396
- end
397
- end
398
- raise('invalid TableRow - no href') unless @href
399
- raise('invalid TableRow - no text') unless @text
400
-
401
- @doc.css('td.philologic_frequency_value').each do |value|
402
- @count = value.text.to_i
403
- break
404
- end
405
- raise('invalid TableRow - no count') unless @count
406
-
407
- end
408
-
409
- #
410
- # Returns value of attribute +key+ or +nil+.
411
- #
412
- def [](key)
413
- @doc.attributes.key?(key) ? @doc.attributes[key].value : nil
414
- end
415
-
416
- #
417
- # Returns Philologic::Link objects referenced by this Philologic::TableRow object.
418
- #
419
- def links
420
- q = @href.sub( %r{^\./\?}, '' ).split('&')
421
- @client.search( Hash[ *q.collect { |v| v.split('=', 2) }.flatten ] )
422
- end
423
-
424
- #
425
- # Return String representation of object.
426
- #
427
- def to_s
428
- [ @field, @count, @text ].join(' | ')
429
- end
430
-
431
- end # class Philologic::TableRow
432
-
433
248
  end # module Philologic
249
+