webscraper_framework 0.1.723 → 0.1.724

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 869eba83d5a2c0319d58c0bcba836b8ca028167f
4
- data.tar.gz: 4d59b040d98b7ac65b99638666b8171a60db0add
3
+ metadata.gz: 242e3ed4c356854b10936f265d2dc8da7996b6b0
4
+ data.tar.gz: 7bec4b43bddbd925338b51f74bb8900a0639f80f
5
5
  SHA512:
6
- metadata.gz: 73e0a6195fab218b40abb8de079fe70a4f84d5a6d59c0c4aaf67b698a90e74a9e2e861a47e429140ea5a0cb6bdd1cd3842404b854d5fef268a4c4aeb6e6d2b81
7
- data.tar.gz: 508b3c5008ce778fb474cd656d061cd2bb66f0a4e722480589154e6ed46da7aea7c4ce4941b9f9f8702882127f0847072251bc67fc8ad59ee2cd4366e76370bd
6
+ metadata.gz: d36f595bf54d918a1e62ad32127b4a4ee4ebb2709f6b22d89c8544958b7739f245c9ccea111f7c4a8f5448040385989a5606230a7176fb7af4dbdbce27afbd3a
7
+ data.tar.gz: 5140c0a076918482138e53589b9b418899fc4167905920b8ae13c776a69a7e6ffe4b8e4e0e27863bcbe5a63e91ef8745dde567df941ff145e83e5261dd79b99c
@@ -1,7 +1,5 @@
1
-
2
1
  module WebscraperFramework
3
2
 
4
-
5
3
  end
6
4
 
7
5
 
@@ -9,3 +7,4 @@ require "webscraper_framework/application"
9
7
  require "webscraper_framework/base"
10
8
  require "webscraper_framework/base_model"
11
9
  require "webscraper_framework/cli"
10
+ require "webscraper_framework/page"
@@ -0,0 +1,46 @@
1
+ module WebscraperFramework
2
+
3
+ class Page
4
+
5
+ attr_accessor :html
6
+
7
+ def self.get_page(url, from_cache = true)
8
+ url_hash = Digest::SHA256.hexdigest(url)
9
+ filename = "cache/#{url_hash}"
10
+ if from_cache && File.file?(filename)
11
+ result = open(filename)
12
+ puts "Gotten #{filename} from cache"
13
+ else
14
+ result = open(url)
15
+ File.write(filename, result.read)
16
+ puts "Written cache file #{filename}"
17
+ end
18
+ return result.read
19
+ end
20
+
21
+ def initialize html: nil
22
+ self.html = html
23
+ end
24
+
25
+ # helper for seamless initialisation no matter what starting point
26
+ def self.by_url(url)
27
+ self.new(html: Nokogiri::HTML(get_page(url)))
28
+ end
29
+
30
+ # helper for seamless initialisation no matter what starting point
31
+ def self.by_html_string(html_string)
32
+ self.new(html: Nokogiri::HTML(html_string))
33
+ end
34
+
35
+ # helper for seamless initialisation no matter what starting point
36
+ def self.by_html(html)
37
+ self.new(html: html)
38
+ end
39
+
40
+ def collection_by_selector(selector)
41
+ self.html.css(selector).map{|item| Page.by_html(item)}
42
+ end
43
+
44
+ end
45
+
46
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: webscraper_framework
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.723
4
+ version: 0.1.724
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rene van Pelt
@@ -108,6 +108,7 @@ files:
108
108
  - lib/webscraper_framework/base.rb
109
109
  - lib/webscraper_framework/base_model.rb
110
110
  - lib/webscraper_framework/cli.rb
111
+ - lib/webscraper_framework/page.rb
111
112
  - views/base.html.haml
112
113
  - views/home.html.haml
113
114
  - views/model.html.haml