liveblog-indexer 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/lib/liveblog-indexer.rb +14 -9
- data.tar.gz.sig +0 -0
- metadata +1 -1
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f82780521af1cc06c3f824c15200ce00745a99b7
|
|
4
|
+
data.tar.gz: 66819da8961da8a60b3e7441318d9f898274893f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: dbea6b4324aaad28c221b24d57b9bb06801d74678dbd0953dc54d3602f39e115b74e12cb66e90c2a86ef769d2432313b533afc3fd19b6eda5f9fcec189e112ac
|
|
7
|
+
data.tar.gz: 0b9dbb25e21b8adf798a4bc766ae338821b2b4e79f7e44ad9c76e4311ae868233a407ceb55669c80ae90c688134e7d474fe63cf36b8d173266c5433c74e50bff
|
checksums.yaml.gz.sig
CHANGED
|
Binary file
|
data/lib/liveblog-indexer.rb
CHANGED
|
@@ -19,7 +19,12 @@ class LiveBlogIndexer
|
|
|
19
19
|
end
|
|
20
20
|
|
|
21
21
|
@xws = XWS.new
|
|
22
|
-
|
|
22
|
+
|
|
23
|
+
@url_index = if urls_indexed and File.exists? urls_indexed then
|
|
24
|
+
JSON.parse(File.read(urls_indexed))
|
|
25
|
+
else
|
|
26
|
+
{}
|
|
27
|
+
end
|
|
23
28
|
|
|
24
29
|
end
|
|
25
30
|
|
|
@@ -40,11 +45,10 @@ class LiveBlogIndexer
|
|
|
40
45
|
|
|
41
46
|
url = "%s/#%s" % [link[/^https?:\/\/[^\/]+(.*)(?=\/$)/,1], \
|
|
42
47
|
section.attributes[:id]]
|
|
43
|
-
|
|
44
48
|
h = @xws.scan section.element('details')
|
|
45
49
|
|
|
46
50
|
h.each do |k, v|
|
|
47
|
-
|
|
51
|
+
|
|
48
52
|
word, count = k, v
|
|
49
53
|
|
|
50
54
|
keyword = @master[word]
|
|
@@ -61,6 +65,8 @@ class LiveBlogIndexer
|
|
|
61
65
|
end # /keyword
|
|
62
66
|
end # /h
|
|
63
67
|
end # /section
|
|
68
|
+
|
|
69
|
+
true
|
|
64
70
|
end # /add_index
|
|
65
71
|
|
|
66
72
|
def crawl(location)
|
|
@@ -81,14 +87,16 @@ class LiveBlogIndexer
|
|
|
81
87
|
private
|
|
82
88
|
|
|
83
89
|
def index_file(location)
|
|
84
|
-
|
|
90
|
+
|
|
91
|
+
return if @url_index.has_key? location
|
|
92
|
+
|
|
85
93
|
puts 'indexing : ' + location.inspect
|
|
86
94
|
doc = Rexle.new(RXFHelper.read(location).first)
|
|
87
95
|
summary = doc.root.element 'summary'
|
|
88
96
|
return unless summary
|
|
89
97
|
|
|
90
98
|
result = add_index doc
|
|
91
|
-
return
|
|
99
|
+
return unless result
|
|
92
100
|
|
|
93
101
|
prev_day = summary.text 'prev_day'
|
|
94
102
|
|
|
@@ -98,9 +106,6 @@ class LiveBlogIndexer
|
|
|
98
106
|
index_file(url)
|
|
99
107
|
end
|
|
100
108
|
end
|
|
101
|
-
|
|
102
|
-
def save_urlsindex(filepath)
|
|
103
|
-
|
|
104
|
-
end
|
|
109
|
+
|
|
105
110
|
|
|
106
111
|
end
|
data.tar.gz.sig
CHANGED
|
Binary file
|
metadata
CHANGED
metadata.gz.sig
CHANGED
|
Binary file
|