webscraper_framework 0.1.733 → 0.1.734

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f6bca092d52da6670894bec3405031ed522bada0
4
- data.tar.gz: 935e3d48a5ae0e6e94bfbcea88de93459b6c1434
3
+ metadata.gz: f6c75efc4c8371716d412d938322a36b7f3c4ca6
4
+ data.tar.gz: 6f65b45cf58f197c7d6c46353fe48c4bd8ff15e1
5
5
  SHA512:
6
- metadata.gz: c958a058d566ae360c03dd9861ce00157f42cc6241cfe7ec27ebdf6f8a473674a82aadd75d4b244f1fcb654c9387b83ec30cf59818d20d36f60d7cb66f4fdf6c
7
- data.tar.gz: 21a547b4cf5d813bbfa859cb4007d1021386d639adc1a197a7e5d0368173557f74b292dedb5b86403210943c1ad43a79b7b8dceaf3c91e09a8d5caad2e0bac09
6
+ metadata.gz: 71639b2805835f624d9b3e4ba346b4feaf5e4178f3ccc0e2543188e2e7eca6b8cd7393db5cbe3bbfb6fc35626a9a7b17018786bdd8a18f619c3cb8205af17466
7
+ data.tar.gz: 8ce6f7bee7cf53a6f9aed06ff70f509f55724bb3638cf0f782915b87a2b903579044bf5c87035d579a30eab2a9bc064a793ffb97784fbf5f7146d17422310d3e
data/bin/wsfr CHANGED
@@ -3,5 +3,4 @@
3
3
  require "bundler/setup"
4
4
  require "webscraper_framework"
5
5
 
6
-
7
6
  WebscraperFramework::CLI.new(ARGV).start
@@ -14,7 +14,6 @@ require "sinatra/base"
14
14
  module WebscraperFramework
15
15
  class Application < Sinatra::Base
16
16
 
17
-
18
17
  def render_file(file, params= {})
19
18
  gemdir = Gem.loaded_specs["webscraper_framework"].gem_dir
20
19
  base_template = File.read(gemdir + "/views/base.html.haml")
@@ -22,23 +21,10 @@ module WebscraperFramework
22
21
  return Haml::Engine.new(base_template).render(Object.new, content: content, models: @@models, scrapers: @@scrapers)
23
22
  end
24
23
 
25
-
26
-
27
24
  model_files = Dir["./models/*"]
28
25
  helper_files = Dir["./helpers/*"]
29
26
  scraper_files = Dir["./scrapers/*"]
30
27
 
31
-
32
- puts "-"
33
- puts Dir["./"]
34
- puts model_files
35
- puts helper_files
36
- puts scraper_files
37
- puts "-"
38
-
39
-
40
-
41
-
42
28
  data = {}
43
29
 
44
30
  @@scrapers = []
@@ -98,7 +84,7 @@ module WebscraperFramework
98
84
  csv << [id] + m.fields.map {|a| obj[a]}
99
85
  end
100
86
  end
101
- # render_file("model", { })
87
+ # render_file("", { })
102
88
  end
103
89
 
104
90
  get '/models/:name' do
@@ -106,7 +92,7 @@ module WebscraperFramework
106
92
  end
107
93
 
108
94
 
109
- puts @@models
95
+ puts "Loaded Models #{@@models}"
110
96
 
111
97
  end
112
98
  end
@@ -1,7 +1,4 @@
1
1
  module WebscraperFramework
2
2
  class Base
3
- def self.awesome?
4
- puts "Hi"
5
- end
6
3
  end
7
4
  end
@@ -42,10 +42,13 @@ module WebscraperFramework
42
42
  end
43
43
 
44
44
  def self.all
45
- puts "#{self.name.underscore}.yml"
46
- puts YAML::load_file("#{self.name}.yml")
47
- puts YAML::load_file("#{self.name}.yml")[:collecion]
48
- YAML::load_file("#{self.name}.yml")[:collection]
45
+ begin
46
+ YAML::load_file("#{self.name}.yml")
47
+ YAML::load_file("#{self.name}.yml")[:collecion]
48
+ YAML::load_file("#{self.name}.yml")[:collection]
49
+ rescue
50
+ []
51
+ end
49
52
  end
50
53
 
51
54
  def fields
@@ -60,7 +63,7 @@ module WebscraperFramework
60
63
  collection = {} unless collection
61
64
  collection[:collection] = {} unless collection[:collection]
62
65
  collection[:collection][id.to_sym] = to_hash
63
- File.open(filename, 'w') {|f| f.write collection.to_yaml } #Store
66
+ File.open(filename, 'w') {|f| f.write collection.to_yaml }
64
67
  end
65
68
  end
66
69
 
@@ -1,43 +1,73 @@
1
1
  require 'nokogiri'
2
+
2
3
  module WebscraperFramework
3
4
 
4
5
  class Page
5
6
 
6
7
  attr_accessor :html
7
8
 
9
+ # Need better flow for what this does.
10
+
11
+ def try_css_attr(css, attr)
12
+ if element = self.try_css(css)
13
+ element.attr(attr)
14
+ else
15
+ nil
16
+ end
17
+ end
18
+
19
+ def try_css_parent_attr(css, attr)
20
+ if element = try_css(css)
21
+ element.parent.attr(attr)
22
+ else
23
+ nil
24
+ end
25
+ end
26
+
27
+ def try_css(css)
28
+ self.html.css(css).first
29
+ end
30
+
31
+
32
+ # Simply gets a webpage based on a url.
33
+ # from_cache = true (default) will take a cached version
34
+ # if it exists.
35
+
8
36
  def self.get_page(url, from_cache = true)
9
37
  url_hash = Digest::SHA256.hexdigest(url)
10
38
  filename = "cache/#{url_hash}"
11
39
  if from_cache && File.file?(filename)
12
- result = open(filename)
40
+ result = open(filename).read
13
41
  puts "Gotten #{filename} from cache"
14
42
  else
15
- result = open(url)
16
- File.write(filename, result.read)
43
+ result = open(url).read
44
+ File.write(filename, result)
17
45
  puts "Written cache file #{filename}"
18
46
  end
19
- return result.read
47
+ return result
20
48
  end
21
49
 
22
50
  def initialize html: nil
23
51
  self.html = html
24
52
  end
25
53
 
26
- # helper for seamless initialisation no matter what starting point
54
+ # helpers for seamless initialisation no matter what starting point
55
+
27
56
  def self.by_url(url)
28
57
  self.new(html: Nokogiri::HTML(get_page(url)))
29
58
  end
30
59
 
31
- # helper for seamless initialisation no matter what starting point
32
60
  def self.by_html_string(html_string)
33
61
  self.new(html: Nokogiri::HTML(html_string))
34
62
  end
35
63
 
36
- # helper for seamless initialisation no matter what starting point
37
64
  def self.by_html(html)
38
65
  self.new(html: html)
39
66
  end
40
67
 
68
+
69
+ # Returns a collection of pages based on a selector.
70
+ # Use to collect a collection of elements from a page.
41
71
  def collection_by_selector(selector)
42
72
  self.html.css(selector).map{|item| WebscraperFramework::Page.by_html(item)}
43
73
  end
@@ -1,10 +1,7 @@
1
-
2
-
3
1
  module WebscraperFramework
4
2
 
5
3
  end
6
4
 
7
-
8
5
  require "webscraper_framework/page"
9
6
  require "webscraper_framework/base_model"
10
7
  require "webscraper_framework/application"
data/views/home.html.haml CHANGED
@@ -1 +1 @@
1
- %p BODY
1
+ %p
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: webscraper_framework
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.733
4
+ version: 0.1.734
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rene van Pelt