dataisland 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/dataisland.rb +117 -0
  2. metadata +66 -0
data/lib/dataisland.rb ADDED
@@ -0,0 +1,117 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'dynarex'
4
+ require 'rexle'
5
+
6
+ class DataIsland
7
+
8
+ attr_reader :html_doc
9
+
10
+ # e.g. url = 'http://jamesrobertson.eu/index.html'
11
+ #
12
+ def initialize(url)
13
+
14
+ useragent = {'UserAgent' => 'Dynarex dataisland to HTML converter'}
15
+ html_buffer = open(url, useragent).read
16
+ @html_doc = Rexle.new html_buffer
17
+
18
+ @html_doc.xpath('//script').map(&:delete)
19
+ h = @html_doc.element('//object').attributes
20
+
21
+ @location_href = File.dirname(url)
22
+
23
+ @html_doc.xpath("//object[@type='text/xml']").each do |x|
24
+
25
+ h = x.attributes
26
+ dynarex = Dynarex.new @location_href + '/' + h[:data]
27
+
28
+ records = (h[:order] and h[:order][/^desc|descending$/]) ?
29
+ dynarex.flat_records.reverse : dynarex.flat_records
30
+
31
+ xpath = "//*[@datasrc='" + '#' + h[:id] + "']"
32
+ @html_doc.xpath(xpath).each do |island|
33
+ render(records, x.attributes, island.element('//*[@datafld]'));
34
+ end
35
+ end
36
+ end
37
+
38
+ private
39
+
40
+ def node_to_clone(element)
41
+
42
+ parent = element.parent
43
+ parentName = parent.name.downcase
44
+
45
+ case parentName
46
+ when 'body'
47
+ return null
48
+ when 'tr'
49
+ return parent
50
+ else
51
+ return node_to_clone(parent)
52
+ end
53
+ end
54
+
55
+ def render(flat_records, h, node)
56
+
57
+ sort_by = h[:sort_by]
58
+ range = h[:range]
59
+
60
+ rec_orig = node_to_clone(node)
61
+
62
+ if rec_orig then
63
+
64
+ # get a reference to each element containing the datafld attribute
65
+ dest_nodes = {}
66
+
67
+ if (h[:rows_per_page]) then
68
+
69
+ pg = 1
70
+ rpp = h[:rows_per_page].to_i
71
+ range = (pg > 1) ?
72
+ Range.new((pg - 1) * rpp,(((pg - 1) * rpp ) + rpp - 1)) :
73
+ Range.new(0,rpp - 1)
74
+ end
75
+
76
+ records = flat_records[range] if range
77
+
78
+ if sort_by then
79
+ if sort_by[/^-/].nil? then
80
+ recs = records.sort_by {|record| record[sort_by] }
81
+ else
82
+ recs = records.sort_by {|record| record[sort_by[1..-1]] }.reverse
83
+ end
84
+ else
85
+ recs = records
86
+ end
87
+
88
+ recs.each do |record|
89
+
90
+ rec = rec_orig.deep_clone
91
+
92
+ rec.xpath('//*[@datafld]').each do |e|
93
+ dest_nodes[e.attribute(:datafld).downcase.to_sym] = e
94
+ end
95
+
96
+ dest_nodes.keys.each do |raw_field|
97
+
98
+ field = raw_field.to_sym
99
+ next if record[field].nil?
100
+
101
+ case dest_nodes[field].name.downcase.to_sym
102
+ when :span
103
+ dest_nodes[field].text = record[field]
104
+ when :a
105
+ dest_nodes[field].attributes['href'] = record[field]
106
+ end
107
+ end
108
+
109
+ rec_orig.parent.add(rec)
110
+ end
111
+
112
+ rec_orig.delete
113
+
114
+ end
115
+ end
116
+
117
+ end
metadata ADDED
@@ -0,0 +1,66 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: dataisland
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.1.0
6
+ platform: ruby
7
+ authors:
8
+ - James Robertson
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2011-11-21 00:00:00 +00:00
14
+ default_executable:
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: dynarex
18
+ prerelease: false
19
+ requirement: &id001 !ruby/object:Gem::Requirement
20
+ none: false
21
+ requirements:
22
+ - - ">="
23
+ - !ruby/object:Gem::Version
24
+ version: "0"
25
+ type: :runtime
26
+ version_requirements: *id001
27
+ description:
28
+ email:
29
+ executables: []
30
+
31
+ extensions: []
32
+
33
+ extra_rdoc_files: []
34
+
35
+ files:
36
+ - lib/dataisland.rb
37
+ has_rdoc: true
38
+ homepage:
39
+ licenses: []
40
+
41
+ post_install_message:
42
+ rdoc_options: []
43
+
44
+ require_paths:
45
+ - lib
46
+ required_ruby_version: !ruby/object:Gem::Requirement
47
+ none: false
48
+ requirements:
49
+ - - ">="
50
+ - !ruby/object:Gem::Version
51
+ version: "0"
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ version: "0"
58
+ requirements: []
59
+
60
+ rubyforge_project:
61
+ rubygems_version: 1.5.2
62
+ signing_key:
63
+ specification_version: 3
64
+ summary: dataisland
65
+ test_files: []
66
+