webscraper_framework 0.1.733 → 0.1.734

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f6bca092d52da6670894bec3405031ed522bada0
4
- data.tar.gz: 935e3d48a5ae0e6e94bfbcea88de93459b6c1434
3
+ metadata.gz: f6c75efc4c8371716d412d938322a36b7f3c4ca6
4
+ data.tar.gz: 6f65b45cf58f197c7d6c46353fe48c4bd8ff15e1
5
5
  SHA512:
6
- metadata.gz: c958a058d566ae360c03dd9861ce00157f42cc6241cfe7ec27ebdf6f8a473674a82aadd75d4b244f1fcb654c9387b83ec30cf59818d20d36f60d7cb66f4fdf6c
7
- data.tar.gz: 21a547b4cf5d813bbfa859cb4007d1021386d639adc1a197a7e5d0368173557f74b292dedb5b86403210943c1ad43a79b7b8dceaf3c91e09a8d5caad2e0bac09
6
+ metadata.gz: 71639b2805835f624d9b3e4ba346b4feaf5e4178f3ccc0e2543188e2e7eca6b8cd7393db5cbe3bbfb6fc35626a9a7b17018786bdd8a18f619c3cb8205af17466
7
+ data.tar.gz: 8ce6f7bee7cf53a6f9aed06ff70f509f55724bb3638cf0f782915b87a2b903579044bf5c87035d579a30eab2a9bc064a793ffb97784fbf5f7146d17422310d3e
data/bin/wsfr CHANGED
@@ -3,5 +3,4 @@
3
3
  require "bundler/setup"
4
4
  require "webscraper_framework"
5
5
 
6
-
7
6
  WebscraperFramework::CLI.new(ARGV).start
@@ -14,7 +14,6 @@ require "sinatra/base"
14
14
  module WebscraperFramework
15
15
  class Application < Sinatra::Base
16
16
 
17
-
18
17
  def render_file(file, params= {})
19
18
  gemdir = Gem.loaded_specs["webscraper_framework"].gem_dir
20
19
  base_template = File.read(gemdir + "/views/base.html.haml")
@@ -22,23 +21,10 @@ module WebscraperFramework
22
21
  return Haml::Engine.new(base_template).render(Object.new, content: content, models: @@models, scrapers: @@scrapers)
23
22
  end
24
23
 
25
-
26
-
27
24
  model_files = Dir["./models/*"]
28
25
  helper_files = Dir["./helpers/*"]
29
26
  scraper_files = Dir["./scrapers/*"]
30
27
 
31
-
32
- puts "-"
33
- puts Dir["./"]
34
- puts model_files
35
- puts helper_files
36
- puts scraper_files
37
- puts "-"
38
-
39
-
40
-
41
-
42
28
  data = {}
43
29
 
44
30
  @@scrapers = []
@@ -98,7 +84,7 @@ module WebscraperFramework
98
84
  csv << [id] + m.fields.map {|a| obj[a]}
99
85
  end
100
86
  end
101
- # render_file("model", { })
87
+ # render_file("", { })
102
88
  end
103
89
 
104
90
  get '/models/:name' do
@@ -106,7 +92,7 @@ module WebscraperFramework
106
92
  end
107
93
 
108
94
 
109
- puts @@models
95
+ puts "Loaded Models #{@@models}"
110
96
 
111
97
  end
112
98
  end
@@ -1,7 +1,4 @@
1
1
  module WebscraperFramework
2
2
  class Base
3
- def self.awesome?
4
- puts "Hi"
5
- end
6
3
  end
7
4
  end
@@ -42,10 +42,13 @@ module WebscraperFramework
42
42
  end
43
43
 
44
44
  def self.all
45
- puts "#{self.name.underscore}.yml"
46
- puts YAML::load_file("#{self.name}.yml")
47
- puts YAML::load_file("#{self.name}.yml")[:collecion]
48
- YAML::load_file("#{self.name}.yml")[:collection]
45
+ begin
46
+ YAML::load_file("#{self.name}.yml")
47
+ YAML::load_file("#{self.name}.yml")[:collecion]
48
+ YAML::load_file("#{self.name}.yml")[:collection]
49
+ rescue
50
+ []
51
+ end
49
52
  end
50
53
 
51
54
  def fields
@@ -60,7 +63,7 @@ module WebscraperFramework
60
63
  collection = {} unless collection
61
64
  collection[:collection] = {} unless collection[:collection]
62
65
  collection[:collection][id.to_sym] = to_hash
63
- File.open(filename, 'w') {|f| f.write collection.to_yaml } #Store
66
+ File.open(filename, 'w') {|f| f.write collection.to_yaml }
64
67
  end
65
68
  end
66
69
 
@@ -1,43 +1,73 @@
1
1
  require 'nokogiri'
2
+
2
3
  module WebscraperFramework
3
4
 
4
5
  class Page
5
6
 
6
7
  attr_accessor :html
7
8
 
9
+ # Need better flow for what this does.
10
+
11
+ def try_css_attr(css, attr)
12
+ if element = self.try_css(css)
13
+ element.attr(attr)
14
+ else
15
+ nil
16
+ end
17
+ end
18
+
19
+ def try_css_parent_attr(css, attr)
20
+ if element = try_css(css)
21
+ element.parent.attr(attr)
22
+ else
23
+ nil
24
+ end
25
+ end
26
+
27
+ def try_css(css)
28
+ self.html.css(css).first
29
+ end
30
+
31
+
32
+ # Simply gets a webpage based on a url.
33
+ # from_cache = true (default) will take a cached version
34
+ # if it exists.
35
+
8
36
  def self.get_page(url, from_cache = true)
9
37
  url_hash = Digest::SHA256.hexdigest(url)
10
38
  filename = "cache/#{url_hash}"
11
39
  if from_cache && File.file?(filename)
12
- result = open(filename)
40
+ result = open(filename).read
13
41
  puts "Gotten #{filename} from cache"
14
42
  else
15
- result = open(url)
16
- File.write(filename, result.read)
43
+ result = open(url).read
44
+ File.write(filename, result)
17
45
  puts "Written cache file #{filename}"
18
46
  end
19
- return result.read
47
+ return result
20
48
  end
21
49
 
22
50
  def initialize html: nil
23
51
  self.html = html
24
52
  end
25
53
 
26
- # helper for seamless initialisation no matter what starting point
54
+ # helpers for seamless initialisation no matter what starting point
55
+
27
56
  def self.by_url(url)
28
57
  self.new(html: Nokogiri::HTML(get_page(url)))
29
58
  end
30
59
 
31
- # helper for seamless initialisation no matter what starting point
32
60
  def self.by_html_string(html_string)
33
61
  self.new(html: Nokogiri::HTML(html_string))
34
62
  end
35
63
 
36
- # helper for seamless initialisation no matter what starting point
37
64
  def self.by_html(html)
38
65
  self.new(html: html)
39
66
  end
40
67
 
68
+
69
+ # Returns a collection of pages based on a selector.
70
+ # Use to collect a collection of elements from a page.
41
71
  def collection_by_selector(selector)
42
72
  self.html.css(selector).map{|item| WebscraperFramework::Page.by_html(item)}
43
73
  end
@@ -1,10 +1,7 @@
1
-
2
-
3
1
  module WebscraperFramework
4
2
 
5
3
  end
6
4
 
7
-
8
5
  require "webscraper_framework/page"
9
6
  require "webscraper_framework/base_model"
10
7
  require "webscraper_framework/application"
data/views/home.html.haml CHANGED
@@ -1 +1 @@
1
- %p BODY
1
+ %p
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: webscraper_framework
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.733
4
+ version: 0.1.734
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rene van Pelt