webminer 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/lib/webminer.rb +9 -7
  2. data/lib/webminer/util.rb +2 -0
  3. metadata +35 -5
data/lib/webminer.rb CHANGED
@@ -5,17 +5,18 @@ require 'rss/1.0'
5
5
  require 'rss/2.0'
6
6
  require 'cgi'
7
7
  require 'iconv'
8
+ require 'ruby-debug'
9
+ require 'mongo_mapper'
8
10
 
9
11
  #Main class, just a place holder
10
12
 
11
13
  class WebMiner
12
14
 
13
15
  def self.create_story(item, topic)
14
-
15
16
  params=CGI::parse(item.link)
16
17
 
17
18
  link = params["url"][0]
18
-
19
+ puts Story.class
19
20
  if Story.where(:link => link).exists?
20
21
  return nil
21
22
  end
@@ -58,9 +59,10 @@ class WebMiner
58
59
 
59
60
  begin
60
61
  story.save
61
- puts "Saved "+story.title
62
- rescue SQLite3::ConstraintException,ActiveRecord::RecordNotUnique => ex
63
- puts "Skipped duplicated story: "+story.title
62
+ puts "Saved #{story.title}"
63
+ rescue ex
64
+ puts "Exception saving #{story.title}"
65
+ pp ex
64
66
  end
65
67
 
66
68
  #doc.css 'div[class="storyText"]' cbsnews.com
@@ -172,7 +174,7 @@ class WebMiner
172
174
  $options['From']='anon@anon.net'
173
175
  topics.keys.each do |topic|
174
176
  url = topics[topic]
175
- sleep 5+prng.rand*2
177
+ sleep 20+prng.rand*2
176
178
  topic_threads << Thread.new(url) do
177
179
  #uri = URI.parse(url)
178
180
 
@@ -215,7 +217,7 @@ class WebMiner
215
217
  print "Skipping\n"
216
218
  end
217
219
  hash = new_hash
218
- sleep 30+prng.rand*2
220
+ sleep 180+prng.rand*10
219
221
  end #while
220
222
  end #thread
221
223
  end #each
data/lib/webminer/util.rb CHANGED
@@ -1,3 +1,5 @@
1
+ require 'gsl'
2
+
1
3
  class WebMiner::Util
2
4
 
3
5
  def self.strip_js(raw_text)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: webminer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -12,10 +12,39 @@ cert_chain: []
12
12
  date: 2012-03-25 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
- name:
16
- - mongo
17
- - mongo_mapper
18
- - nokogiri
15
+ name: mongo
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: mongo_mapper
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: nokogiri
19
48
  requirement: !ruby/object:Gem::Requirement
20
49
  none: false
21
50
  requirements:
@@ -64,3 +93,4 @@ signing_key:
64
93
  specification_version: 3
65
94
  summary: I mine the web
66
95
  test_files: []
96
+ has_rdoc: