webscraper_framework 0.1.723 → 0.1.724

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 869eba83d5a2c0319d58c0bcba836b8ca028167f
4
- data.tar.gz: 4d59b040d98b7ac65b99638666b8171a60db0add
3
+ metadata.gz: 242e3ed4c356854b10936f265d2dc8da7996b6b0
4
+ data.tar.gz: 7bec4b43bddbd925338b51f74bb8900a0639f80f
5
5
  SHA512:
6
- metadata.gz: 73e0a6195fab218b40abb8de079fe70a4f84d5a6d59c0c4aaf67b698a90e74a9e2e861a47e429140ea5a0cb6bdd1cd3842404b854d5fef268a4c4aeb6e6d2b81
7
- data.tar.gz: 508b3c5008ce778fb474cd656d061cd2bb66f0a4e722480589154e6ed46da7aea7c4ce4941b9f9f8702882127f0847072251bc67fc8ad59ee2cd4366e76370bd
6
+ metadata.gz: d36f595bf54d918a1e62ad32127b4a4ee4ebb2709f6b22d89c8544958b7739f245c9ccea111f7c4a8f5448040385989a5606230a7176fb7af4dbdbce27afbd3a
7
+ data.tar.gz: 5140c0a076918482138e53589b9b418899fc4167905920b8ae13c776a69a7e6ffe4b8e4e0e27863bcbe5a63e91ef8745dde567df941ff145e83e5261dd79b99c
@@ -1,7 +1,5 @@
1
-
2
1
  module WebscraperFramework
3
2
 
4
-
5
3
  end
6
4
 
7
5
 
@@ -9,3 +7,4 @@ require "webscraper_framework/application"
9
7
  require "webscraper_framework/base"
10
8
  require "webscraper_framework/base_model"
11
9
  require "webscraper_framework/cli"
10
+ require "webscraper_framework/page"
@@ -0,0 +1,46 @@
1
+ module WebscraperFramework
2
+
3
+ class Page
4
+
5
+ attr_accessor :html
6
+
7
+ def self.get_page(url, from_cache = true)
8
+ url_hash = Digest::SHA256.hexdigest(url)
9
+ filename = "cache/#{url_hash}"
10
+ if from_cache && File.file?(filename)
11
+ result = open(filename)
12
+ puts "Gotten #{filename} from cache"
13
+ else
14
+ result = open(url)
15
+ File.write(filename, result.read)
16
+ puts "Written cache file #{filename}"
17
+ end
18
+ return result.read
19
+ end
20
+
21
+ def initialize html: nil
22
+ self.html = html
23
+ end
24
+
25
+ # helper for seamless initialisation no matter what starting point
26
+ def self.by_url(url)
27
+ self.new(html: Nokogiri::HTML(get_page(url)))
28
+ end
29
+
30
+ # helper for seamless initialisation no matter what starting point
31
+ def self.by_html_string(html_string)
32
+ self.new(html: Nokogiri::HTML(html_string))
33
+ end
34
+
35
+ # helper for seamless initialisation no matter what starting point
36
+ def self.by_html(html)
37
+ self.new(html: html)
38
+ end
39
+
40
+ def collection_by_selector(selector)
41
+ self.html.css(selector).map{|item| Page.by_html(item)}
42
+ end
43
+
44
+ end
45
+
46
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: webscraper_framework
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.723
4
+ version: 0.1.724
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rene van Pelt
@@ -108,6 +108,7 @@ files:
108
108
  - lib/webscraper_framework/base.rb
109
109
  - lib/webscraper_framework/base_model.rb
110
110
  - lib/webscraper_framework/cli.rb
111
+ - lib/webscraper_framework/page.rb
111
112
  - views/base.html.haml
112
113
  - views/home.html.haml
113
114
  - views/model.html.haml