dataisland 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/dataisland.rb +117 -0
- metadata +66 -0
data/lib/dataisland.rb
ADDED
@@ -0,0 +1,117 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'dynarex'
|
4
|
+
require 'rexle'
|
5
|
+
|
6
|
+
class DataIsland
|
7
|
+
|
8
|
+
attr_reader :html_doc
|
9
|
+
|
10
|
+
# e.g. url = 'http://jamesrobertson.eu/index.html'
|
11
|
+
#
|
12
|
+
def initialize(url)
|
13
|
+
|
14
|
+
useragent = {'UserAgent' => 'Dynarex dataisland to HTML converter'}
|
15
|
+
html_buffer = open(url, useragent).read
|
16
|
+
@html_doc = Rexle.new html_buffer
|
17
|
+
|
18
|
+
@html_doc.xpath('//script').map(&:delete)
|
19
|
+
h = @html_doc.element('//object').attributes
|
20
|
+
|
21
|
+
@location_href = File.dirname(url)
|
22
|
+
|
23
|
+
@html_doc.xpath("//object[@type='text/xml']").each do |x|
|
24
|
+
|
25
|
+
h = x.attributes
|
26
|
+
dynarex = Dynarex.new @location_href + '/' + h[:data]
|
27
|
+
|
28
|
+
records = (h[:order] and h[:order][/^desc|descending$/]) ?
|
29
|
+
dynarex.flat_records.reverse : dynarex.flat_records
|
30
|
+
|
31
|
+
xpath = "//*[@datasrc='" + '#' + h[:id] + "']"
|
32
|
+
@html_doc.xpath(xpath).each do |island|
|
33
|
+
render(records, x.attributes, island.element('//*[@datafld]'));
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def node_to_clone(element)
|
41
|
+
|
42
|
+
parent = element.parent
|
43
|
+
parentName = parent.name.downcase
|
44
|
+
|
45
|
+
case parentName
|
46
|
+
when 'body'
|
47
|
+
return null
|
48
|
+
when 'tr'
|
49
|
+
return parent
|
50
|
+
else
|
51
|
+
return node_to_clone(parent)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def render(flat_records, h, node)
|
56
|
+
|
57
|
+
sort_by = h[:sort_by]
|
58
|
+
range = h[:range]
|
59
|
+
|
60
|
+
rec_orig = node_to_clone(node)
|
61
|
+
|
62
|
+
if rec_orig then
|
63
|
+
|
64
|
+
# get a reference to each element containing the datafld attribute
|
65
|
+
dest_nodes = {}
|
66
|
+
|
67
|
+
if (h[:rows_per_page]) then
|
68
|
+
|
69
|
+
pg = 1
|
70
|
+
rpp = h[:rows_per_page].to_i
|
71
|
+
range = (pg > 1) ?
|
72
|
+
Range.new((pg - 1) * rpp,(((pg - 1) * rpp ) + rpp - 1)) :
|
73
|
+
Range.new(0,rpp - 1)
|
74
|
+
end
|
75
|
+
|
76
|
+
records = flat_records[range] if range
|
77
|
+
|
78
|
+
if sort_by then
|
79
|
+
if sort_by[/^-/].nil? then
|
80
|
+
recs = records.sort_by {|record| record[sort_by] }
|
81
|
+
else
|
82
|
+
recs = records.sort_by {|record| record[sort_by[1..-1]] }.reverse
|
83
|
+
end
|
84
|
+
else
|
85
|
+
recs = records
|
86
|
+
end
|
87
|
+
|
88
|
+
recs.each do |record|
|
89
|
+
|
90
|
+
rec = rec_orig.deep_clone
|
91
|
+
|
92
|
+
rec.xpath('//*[@datafld]').each do |e|
|
93
|
+
dest_nodes[e.attribute(:datafld).downcase.to_sym] = e
|
94
|
+
end
|
95
|
+
|
96
|
+
dest_nodes.keys.each do |raw_field|
|
97
|
+
|
98
|
+
field = raw_field.to_sym
|
99
|
+
next if record[field].nil?
|
100
|
+
|
101
|
+
case dest_nodes[field].name.downcase.to_sym
|
102
|
+
when :span
|
103
|
+
dest_nodes[field].text = record[field]
|
104
|
+
when :a
|
105
|
+
dest_nodes[field].attributes['href'] = record[field]
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
rec_orig.parent.add(rec)
|
110
|
+
end
|
111
|
+
|
112
|
+
rec_orig.delete
|
113
|
+
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
end
|
metadata
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: dataisland
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 0.1.0
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- James Robertson
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2011-11-21 00:00:00 +00:00
|
14
|
+
default_executable:
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: dynarex
|
18
|
+
prerelease: false
|
19
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
20
|
+
none: false
|
21
|
+
requirements:
|
22
|
+
- - ">="
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: "0"
|
25
|
+
type: :runtime
|
26
|
+
version_requirements: *id001
|
27
|
+
description:
|
28
|
+
email:
|
29
|
+
executables: []
|
30
|
+
|
31
|
+
extensions: []
|
32
|
+
|
33
|
+
extra_rdoc_files: []
|
34
|
+
|
35
|
+
files:
|
36
|
+
- lib/dataisland.rb
|
37
|
+
has_rdoc: true
|
38
|
+
homepage:
|
39
|
+
licenses: []
|
40
|
+
|
41
|
+
post_install_message:
|
42
|
+
rdoc_options: []
|
43
|
+
|
44
|
+
require_paths:
|
45
|
+
- lib
|
46
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
47
|
+
none: false
|
48
|
+
requirements:
|
49
|
+
- - ">="
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: "0"
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: "0"
|
58
|
+
requirements: []
|
59
|
+
|
60
|
+
rubyforge_project:
|
61
|
+
rubygems_version: 1.5.2
|
62
|
+
signing_key:
|
63
|
+
specification_version: 3
|
64
|
+
summary: dataisland
|
65
|
+
test_files: []
|
66
|
+
|