dataisland 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/dataisland.rb +117 -0
- metadata +66 -0
data/lib/dataisland.rb
ADDED
@@ -0,0 +1,117 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'dynarex'
|
4
|
+
require 'rexle'
|
5
|
+
|
6
|
+
class DataIsland
|
7
|
+
|
8
|
+
attr_reader :html_doc
|
9
|
+
|
10
|
+
# e.g. url = 'http://jamesrobertson.eu/index.html'
|
11
|
+
#
|
12
|
+
def initialize(url)
|
13
|
+
|
14
|
+
useragent = {'UserAgent' => 'Dynarex dataisland to HTML converter'}
|
15
|
+
html_buffer = open(url, useragent).read
|
16
|
+
@html_doc = Rexle.new html_buffer
|
17
|
+
|
18
|
+
@html_doc.xpath('//script').map(&:delete)
|
19
|
+
h = @html_doc.element('//object').attributes
|
20
|
+
|
21
|
+
@location_href = File.dirname(url)
|
22
|
+
|
23
|
+
@html_doc.xpath("//object[@type='text/xml']").each do |x|
|
24
|
+
|
25
|
+
h = x.attributes
|
26
|
+
dynarex = Dynarex.new @location_href + '/' + h[:data]
|
27
|
+
|
28
|
+
records = (h[:order] and h[:order][/^desc|descending$/]) ?
|
29
|
+
dynarex.flat_records.reverse : dynarex.flat_records
|
30
|
+
|
31
|
+
xpath = "//*[@datasrc='" + '#' + h[:id] + "']"
|
32
|
+
@html_doc.xpath(xpath).each do |island|
|
33
|
+
render(records, x.attributes, island.element('//*[@datafld]'));
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def node_to_clone(element)
|
41
|
+
|
42
|
+
parent = element.parent
|
43
|
+
parentName = parent.name.downcase
|
44
|
+
|
45
|
+
case parentName
|
46
|
+
when 'body'
|
47
|
+
return null
|
48
|
+
when 'tr'
|
49
|
+
return parent
|
50
|
+
else
|
51
|
+
return node_to_clone(parent)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def render(flat_records, h, node)
|
56
|
+
|
57
|
+
sort_by = h[:sort_by]
|
58
|
+
range = h[:range]
|
59
|
+
|
60
|
+
rec_orig = node_to_clone(node)
|
61
|
+
|
62
|
+
if rec_orig then
|
63
|
+
|
64
|
+
# get a reference to each element containing the datafld attribute
|
65
|
+
dest_nodes = {}
|
66
|
+
|
67
|
+
if (h[:rows_per_page]) then
|
68
|
+
|
69
|
+
pg = 1
|
70
|
+
rpp = h[:rows_per_page].to_i
|
71
|
+
range = (pg > 1) ?
|
72
|
+
Range.new((pg - 1) * rpp,(((pg - 1) * rpp ) + rpp - 1)) :
|
73
|
+
Range.new(0,rpp - 1)
|
74
|
+
end
|
75
|
+
|
76
|
+
records = flat_records[range] if range
|
77
|
+
|
78
|
+
if sort_by then
|
79
|
+
if sort_by[/^-/].nil? then
|
80
|
+
recs = records.sort_by {|record| record[sort_by] }
|
81
|
+
else
|
82
|
+
recs = records.sort_by {|record| record[sort_by[1..-1]] }.reverse
|
83
|
+
end
|
84
|
+
else
|
85
|
+
recs = records
|
86
|
+
end
|
87
|
+
|
88
|
+
recs.each do |record|
|
89
|
+
|
90
|
+
rec = rec_orig.deep_clone
|
91
|
+
|
92
|
+
rec.xpath('//*[@datafld]').each do |e|
|
93
|
+
dest_nodes[e.attribute(:datafld).downcase.to_sym] = e
|
94
|
+
end
|
95
|
+
|
96
|
+
dest_nodes.keys.each do |raw_field|
|
97
|
+
|
98
|
+
field = raw_field.to_sym
|
99
|
+
next if record[field].nil?
|
100
|
+
|
101
|
+
case dest_nodes[field].name.downcase.to_sym
|
102
|
+
when :span
|
103
|
+
dest_nodes[field].text = record[field]
|
104
|
+
when :a
|
105
|
+
dest_nodes[field].attributes['href'] = record[field]
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
rec_orig.parent.add(rec)
|
110
|
+
end
|
111
|
+
|
112
|
+
rec_orig.delete
|
113
|
+
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
end
|
metadata
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: dataisland
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 0.1.0
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- James Robertson
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2011-11-21 00:00:00 +00:00
|
14
|
+
default_executable:
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: dynarex
|
18
|
+
prerelease: false
|
19
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
20
|
+
none: false
|
21
|
+
requirements:
|
22
|
+
- - ">="
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: "0"
|
25
|
+
type: :runtime
|
26
|
+
version_requirements: *id001
|
27
|
+
description:
|
28
|
+
email:
|
29
|
+
executables: []
|
30
|
+
|
31
|
+
extensions: []
|
32
|
+
|
33
|
+
extra_rdoc_files: []
|
34
|
+
|
35
|
+
files:
|
36
|
+
- lib/dataisland.rb
|
37
|
+
has_rdoc: true
|
38
|
+
homepage:
|
39
|
+
licenses: []
|
40
|
+
|
41
|
+
post_install_message:
|
42
|
+
rdoc_options: []
|
43
|
+
|
44
|
+
require_paths:
|
45
|
+
- lib
|
46
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
47
|
+
none: false
|
48
|
+
requirements:
|
49
|
+
- - ">="
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: "0"
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: "0"
|
58
|
+
requirements: []
|
59
|
+
|
60
|
+
rubyforge_project:
|
61
|
+
rubygems_version: 1.5.2
|
62
|
+
signing_key:
|
63
|
+
specification_version: 3
|
64
|
+
summary: dataisland
|
65
|
+
test_files: []
|
66
|
+
|