liveblog-indexer 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 711ba6e2a290dfc67dfc1703ffcddf417c7c90d3
4
- data.tar.gz: 5525552551eee1953370b99034d2a712bb308ac2
3
+ metadata.gz: f82780521af1cc06c3f824c15200ce00745a99b7
4
+ data.tar.gz: 66819da8961da8a60b3e7441318d9f898274893f
5
5
  SHA512:
6
- metadata.gz: 8648c96fdf3f0bf7c0b1ccc72f99bef2759db323a159e4361fc479f7d0d5847b2ad0f04361533e161f1a0cd6eb5b85544823f0159cd270adb29e6ac00cce10ed
7
- data.tar.gz: 075d272a0af388c4addbcfb141ac0de8143d60b5b7ada4575d93a9ba84cc8e5b24b3c08490171196708012dbbed65dd5f2a588bdde538290f20ba121f79ced1a
6
+ metadata.gz: dbea6b4324aaad28c221b24d57b9bb06801d74678dbd0953dc54d3602f39e115b74e12cb66e90c2a86ef769d2432313b533afc3fd19b6eda5f9fcec189e112ac
7
+ data.tar.gz: 0b9dbb25e21b8adf798a4bc766ae338821b2b4e79f7e44ad9c76e4311ae868233a407ceb55669c80ae90c688134e7d474fe63cf36b8d173266c5433c74e50bff
checksums.yaml.gz.sig CHANGED
Binary file
@@ -19,7 +19,12 @@ class LiveBlogIndexer
19
19
  end
20
20
 
21
21
  @xws = XWS.new
22
- @url_index = {}
22
+
23
+ @url_index = if urls_indexed and File.exists? urls_indexed then
24
+ JSON.parse(File.read(urls_indexed))
25
+ else
26
+ {}
27
+ end
23
28
 
24
29
  end
25
30
 
@@ -40,11 +45,10 @@ class LiveBlogIndexer
40
45
 
41
46
  url = "%s/#%s" % [link[/^https?:\/\/[^\/]+(.*)(?=\/$)/,1], \
42
47
  section.attributes[:id]]
43
-
44
48
  h = @xws.scan section.element('details')
45
49
 
46
50
  h.each do |k, v|
47
-
51
+
48
52
  word, count = k, v
49
53
 
50
54
  keyword = @master[word]
@@ -61,6 +65,8 @@ class LiveBlogIndexer
61
65
  end # /keyword
62
66
  end # /h
63
67
  end # /section
68
+
69
+ true
64
70
  end # /add_index
65
71
 
66
72
  def crawl(location)
@@ -81,14 +87,16 @@ class LiveBlogIndexer
81
87
  private
82
88
 
83
89
  def index_file(location)
84
-
90
+
91
+ return if @url_index.has_key? location
92
+
85
93
  puts 'indexing : ' + location.inspect
86
94
  doc = Rexle.new(RXFHelper.read(location).first)
87
95
  summary = doc.root.element 'summary'
88
96
  return unless summary
89
97
 
90
98
  result = add_index doc
91
- return unless result
99
+ return unless result
92
100
 
93
101
  prev_day = summary.text 'prev_day'
94
102
 
@@ -98,9 +106,6 @@ class LiveBlogIndexer
98
106
  index_file(url)
99
107
  end
100
108
  end
101
-
102
- def save_urlsindex(filepath)
103
-
104
- end
109
+
105
110
 
106
111
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: liveblog-indexer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
metadata.gz.sig CHANGED
Binary file