dataisland 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/dataisland.rb +117 -0
  2. metadata +66 -0
data/lib/dataisland.rb ADDED
@@ -0,0 +1,117 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'dynarex'
4
+ require 'rexle'
5
+
6
+ class DataIsland
7
+
8
+ attr_reader :html_doc
9
+
10
+ # e.g. url = 'http://jamesrobertson.eu/index.html'
11
+ #
12
+ def initialize(url)
13
+
14
+ useragent = {'UserAgent' => 'Dynarex dataisland to HTML converter'}
15
+ html_buffer = open(url, useragent).read
16
+ @html_doc = Rexle.new html_buffer
17
+
18
+ @html_doc.xpath('//script').map(&:delete)
19
+ h = @html_doc.element('//object').attributes
20
+
21
+ @location_href = File.dirname(url)
22
+
23
+ @html_doc.xpath("//object[@type='text/xml']").each do |x|
24
+
25
+ h = x.attributes
26
+ dynarex = Dynarex.new @location_href + '/' + h[:data]
27
+
28
+ records = (h[:order] and h[:order][/^desc|descending$/]) ?
29
+ dynarex.flat_records.reverse : dynarex.flat_records
30
+
31
+ xpath = "//*[@datasrc='" + '#' + h[:id] + "']"
32
+ @html_doc.xpath(xpath).each do |island|
33
+ render(records, x.attributes, island.element('//*[@datafld]'));
34
+ end
35
+ end
36
+ end
37
+
38
+ private
39
+
40
+ def node_to_clone(element)
41
+
42
+ parent = element.parent
43
+ parentName = parent.name.downcase
44
+
45
+ case parentName
46
+ when 'body'
47
+ return null
48
+ when 'tr'
49
+ return parent
50
+ else
51
+ return node_to_clone(parent)
52
+ end
53
+ end
54
+
55
+ def render(flat_records, h, node)
56
+
57
+ sort_by = h[:sort_by]
58
+ range = h[:range]
59
+
60
+ rec_orig = node_to_clone(node)
61
+
62
+ if rec_orig then
63
+
64
+ # get a reference to each element containing the datafld attribute
65
+ dest_nodes = {}
66
+
67
+ if (h[:rows_per_page]) then
68
+
69
+ pg = 1
70
+ rpp = h[:rows_per_page].to_i
71
+ range = (pg > 1) ?
72
+ Range.new((pg - 1) * rpp,(((pg - 1) * rpp ) + rpp - 1)) :
73
+ Range.new(0,rpp - 1)
74
+ end
75
+
76
+ records = flat_records[range] if range
77
+
78
+ if sort_by then
79
+ if sort_by[/^-/].nil? then
80
+ recs = records.sort_by {|record| record[sort_by] }
81
+ else
82
+ recs = records.sort_by {|record| record[sort_by[1..-1]] }.reverse
83
+ end
84
+ else
85
+ recs = records
86
+ end
87
+
88
+ recs.each do |record|
89
+
90
+ rec = rec_orig.deep_clone
91
+
92
+ rec.xpath('//*[@datafld]').each do |e|
93
+ dest_nodes[e.attribute(:datafld).downcase.to_sym] = e
94
+ end
95
+
96
+ dest_nodes.keys.each do |raw_field|
97
+
98
+ field = raw_field.to_sym
99
+ next if record[field].nil?
100
+
101
+ case dest_nodes[field].name.downcase.to_sym
102
+ when :span
103
+ dest_nodes[field].text = record[field]
104
+ when :a
105
+ dest_nodes[field].attributes['href'] = record[field]
106
+ end
107
+ end
108
+
109
+ rec_orig.parent.add(rec)
110
+ end
111
+
112
+ rec_orig.delete
113
+
114
+ end
115
+ end
116
+
117
+ end
metadata ADDED
@@ -0,0 +1,66 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: dataisland
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.1.0
6
+ platform: ruby
7
+ authors:
8
+ - James Robertson
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2011-11-21 00:00:00 +00:00
14
+ default_executable:
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: dynarex
18
+ prerelease: false
19
+ requirement: &id001 !ruby/object:Gem::Requirement
20
+ none: false
21
+ requirements:
22
+ - - ">="
23
+ - !ruby/object:Gem::Version
24
+ version: "0"
25
+ type: :runtime
26
+ version_requirements: *id001
27
+ description:
28
+ email:
29
+ executables: []
30
+
31
+ extensions: []
32
+
33
+ extra_rdoc_files: []
34
+
35
+ files:
36
+ - lib/dataisland.rb
37
+ has_rdoc: true
38
+ homepage:
39
+ licenses: []
40
+
41
+ post_install_message:
42
+ rdoc_options: []
43
+
44
+ require_paths:
45
+ - lib
46
+ required_ruby_version: !ruby/object:Gem::Requirement
47
+ none: false
48
+ requirements:
49
+ - - ">="
50
+ - !ruby/object:Gem::Version
51
+ version: "0"
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ version: "0"
58
+ requirements: []
59
+
60
+ rubyforge_project:
61
+ rubygems_version: 1.5.2
62
+ signing_key:
63
+ specification_version: 3
64
+ summary: dataisland
65
+ test_files: []
66
+