webminer 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/webminer.rb +9 -7
- data/lib/webminer/util.rb +2 -0
- metadata +35 -5
data/lib/webminer.rb
CHANGED
@@ -5,17 +5,18 @@ require 'rss/1.0'
|
|
5
5
|
require 'rss/2.0'
|
6
6
|
require 'cgi'
|
7
7
|
require 'iconv'
|
8
|
+
require 'ruby-debug'
|
9
|
+
require 'mongo_mapper'
|
8
10
|
|
9
11
|
#Main class, just a place holder
|
10
12
|
|
11
13
|
class WebMiner
|
12
14
|
|
13
15
|
def self.create_story(item, topic)
|
14
|
-
|
15
16
|
params=CGI::parse(item.link)
|
16
17
|
|
17
18
|
link = params["url"][0]
|
18
|
-
|
19
|
+
puts Story.class
|
19
20
|
if Story.where(:link => link).exists?
|
20
21
|
return nil
|
21
22
|
end
|
@@ -58,9 +59,10 @@ class WebMiner
|
|
58
59
|
|
59
60
|
begin
|
60
61
|
story.save
|
61
|
-
puts "Saved
|
62
|
-
rescue
|
63
|
-
puts "
|
62
|
+
puts "Saved #{story.title}"
|
63
|
+
rescue ex
|
64
|
+
puts "Exception saving #{story.title}"
|
65
|
+
pp ex
|
64
66
|
end
|
65
67
|
|
66
68
|
#doc.css 'div[class="storyText"]' cbsnews.com
|
@@ -172,7 +174,7 @@ class WebMiner
|
|
172
174
|
$options['From']='anon@anon.net'
|
173
175
|
topics.keys.each do |topic|
|
174
176
|
url = topics[topic]
|
175
|
-
sleep
|
177
|
+
sleep 20+prng.rand*2
|
176
178
|
topic_threads << Thread.new(url) do
|
177
179
|
#uri = URI.parse(url)
|
178
180
|
|
@@ -215,7 +217,7 @@ class WebMiner
|
|
215
217
|
print "Skipping\n"
|
216
218
|
end
|
217
219
|
hash = new_hash
|
218
|
-
sleep
|
220
|
+
sleep 180+prng.rand*10
|
219
221
|
end #while
|
220
222
|
end #thread
|
221
223
|
end #each
|
data/lib/webminer/util.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: webminer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -12,10 +12,39 @@ cert_chain: []
|
|
12
12
|
date: 2012-03-25 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
-
name:
|
16
|
-
|
17
|
-
|
18
|
-
|
15
|
+
name: mongo
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: mongo_mapper
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: nokogiri
|
19
48
|
requirement: !ruby/object:Gem::Requirement
|
20
49
|
none: false
|
21
50
|
requirements:
|
@@ -64,3 +93,4 @@ signing_key:
|
|
64
93
|
specification_version: 3
|
65
94
|
summary: I mine the web
|
66
95
|
test_files: []
|
96
|
+
has_rdoc:
|