webminer 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/webminer.rb +9 -7
- data/lib/webminer/util.rb +2 -0
- metadata +35 -5
data/lib/webminer.rb
CHANGED
@@ -5,17 +5,18 @@ require 'rss/1.0'
|
|
5
5
|
require 'rss/2.0'
|
6
6
|
require 'cgi'
|
7
7
|
require 'iconv'
|
8
|
+
require 'ruby-debug'
|
9
|
+
require 'mongo_mapper'
|
8
10
|
|
9
11
|
#Main class, just a place holder
|
10
12
|
|
11
13
|
class WebMiner
|
12
14
|
|
13
15
|
def self.create_story(item, topic)
|
14
|
-
|
15
16
|
params=CGI::parse(item.link)
|
16
17
|
|
17
18
|
link = params["url"][0]
|
18
|
-
|
19
|
+
puts Story.class
|
19
20
|
if Story.where(:link => link).exists?
|
20
21
|
return nil
|
21
22
|
end
|
@@ -58,9 +59,10 @@ class WebMiner
|
|
58
59
|
|
59
60
|
begin
|
60
61
|
story.save
|
61
|
-
puts "Saved
|
62
|
-
rescue
|
63
|
-
puts "
|
62
|
+
puts "Saved #{story.title}"
|
63
|
+
rescue ex
|
64
|
+
puts "Exception saving #{story.title}"
|
65
|
+
pp ex
|
64
66
|
end
|
65
67
|
|
66
68
|
#doc.css 'div[class="storyText"]' cbsnews.com
|
@@ -172,7 +174,7 @@ class WebMiner
|
|
172
174
|
$options['From']='anon@anon.net'
|
173
175
|
topics.keys.each do |topic|
|
174
176
|
url = topics[topic]
|
175
|
-
sleep
|
177
|
+
sleep 20+prng.rand*2
|
176
178
|
topic_threads << Thread.new(url) do
|
177
179
|
#uri = URI.parse(url)
|
178
180
|
|
@@ -215,7 +217,7 @@ class WebMiner
|
|
215
217
|
print "Skipping\n"
|
216
218
|
end
|
217
219
|
hash = new_hash
|
218
|
-
sleep
|
220
|
+
sleep 180+prng.rand*10
|
219
221
|
end #while
|
220
222
|
end #thread
|
221
223
|
end #each
|
data/lib/webminer/util.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: webminer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -12,10 +12,39 @@ cert_chain: []
|
|
12
12
|
date: 2012-03-25 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
-
name:
|
16
|
-
|
17
|
-
|
18
|
-
|
15
|
+
name: mongo
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: mongo_mapper
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: nokogiri
|
19
48
|
requirement: !ruby/object:Gem::Requirement
|
20
49
|
none: false
|
21
50
|
requirements:
|
@@ -64,3 +93,4 @@ signing_key:
|
|
64
93
|
specification_version: 3
|
65
94
|
summary: I mine the web
|
66
95
|
test_files: []
|
96
|
+
has_rdoc:
|