webminer 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/lib/webminer.rb +9 -7
  2. data/lib/webminer/util.rb +2 -0
  3. metadata +35 -5
data/lib/webminer.rb CHANGED
@@ -5,17 +5,18 @@ require 'rss/1.0'
5
5
  require 'rss/2.0'
6
6
  require 'cgi'
7
7
  require 'iconv'
8
+ require 'ruby-debug'
9
+ require 'mongo_mapper'
8
10
 
9
11
  #Main class, just a place holder
10
12
 
11
13
  class WebMiner
12
14
 
13
15
  def self.create_story(item, topic)
14
-
15
16
  params=CGI::parse(item.link)
16
17
 
17
18
  link = params["url"][0]
18
-
19
+ puts Story.class
19
20
  if Story.where(:link => link).exists?
20
21
  return nil
21
22
  end
@@ -58,9 +59,10 @@ class WebMiner
58
59
 
59
60
  begin
60
61
  story.save
61
- puts "Saved "+story.title
62
- rescue SQLite3::ConstraintException,ActiveRecord::RecordNotUnique => ex
63
- puts "Skipped duplicated story: "+story.title
62
+ puts "Saved #{story.title}"
63
+ rescue ex
64
+ puts "Exception saving #{story.title}"
65
+ pp ex
64
66
  end
65
67
 
66
68
  #doc.css 'div[class="storyText"]' cbsnews.com
@@ -172,7 +174,7 @@ class WebMiner
172
174
  $options['From']='anon@anon.net'
173
175
  topics.keys.each do |topic|
174
176
  url = topics[topic]
175
- sleep 5+prng.rand*2
177
+ sleep 20+prng.rand*2
176
178
  topic_threads << Thread.new(url) do
177
179
  #uri = URI.parse(url)
178
180
 
@@ -215,7 +217,7 @@ class WebMiner
215
217
  print "Skipping\n"
216
218
  end
217
219
  hash = new_hash
218
- sleep 30+prng.rand*2
220
+ sleep 180+prng.rand*10
219
221
  end #while
220
222
  end #thread
221
223
  end #each
data/lib/webminer/util.rb CHANGED
@@ -1,3 +1,5 @@
1
+ require 'gsl'
2
+
1
3
  class WebMiner::Util
2
4
 
3
5
  def self.strip_js(raw_text)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: webminer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -12,10 +12,39 @@ cert_chain: []
12
12
  date: 2012-03-25 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
- name:
16
- - mongo
17
- - mongo_mapper
18
- - nokogiri
15
+ name: mongo
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: mongo_mapper
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: nokogiri
19
48
  requirement: !ruby/object:Gem::Requirement
20
49
  none: false
21
50
  requirements:
@@ -64,3 +93,4 @@ signing_key:
64
93
  specification_version: 3
65
94
  summary: I mine the web
66
95
  test_files: []
96
+ has_rdoc: