slmndr 0.0.0 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (6) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +2 -2
  3. data/lib/slmndr.rb +15 -27
  4. data.tar.gz.sig +0 -0
  5. metadata +86 -4
  6. metadata.gz.sig +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 20770bf44914d9179137aee64b3f86626b260d57
4
- data.tar.gz: dd6067dd15692a27fa5216ab67cbe0b7321eaacc
3
+ metadata.gz: 2ebd74e5e1f4bf29b4b8a01362c13eb8196e4ff5
4
+ data.tar.gz: 6053b2e0ec26ba5dd0df380060dd60bcf3a45185
5
5
  SHA512:
6
- metadata.gz: 2c7d1723c78474eec28e2f7f22e5b19b601fc5903fb09bec22aa80c6bd266e8260608bd0b7a4ca11e3c012a5bb84ead4c1475b8426cfed46335657c474bea190
7
- data.tar.gz: f12be9d14cb2f5d4f1813cb09c77fd3e207ce6268f9e8f12e4caac8ebba01ca9e8e57ba77e3614dd46a616a779d76764cc3e6f182ae5652d880e4240657f1730
6
+ metadata.gz: 77db1e9e5d4d8ba8e2ef70d1ccd87e4220f95a06e093405e43cd29d0f8f6ca8c9643d2872e10eb5ce38496796c650bcd0b0118c7a963dc363c05fa5b91ea5c18
7
+ data.tar.gz: 88ba03da2130c604382f9db2690da8542cf87d8e03c036823afc4dd89f0885a68f192827e5d6a6cc024736cc6a89289b605f54569b798aa11fbd21352ef27da9
checksums.yaml.gz.sig CHANGED
@@ -1,2 +1,2 @@
1
- "�|0@�U}vcƥ�Q��=:��@};}N@�}���p_l7����.�� �O�W�yS�jJ���[��7�7�I�s�H�C����/��P���8�U���HQ��ApO^s���S�s|�җ�*&
2
- �T���,�fb��t
1
+
2
+ e]�
data/lib/slmndr.rb CHANGED
@@ -1,5 +1,5 @@
1
- ## Salamander: A minimalistic ruby web crawling framework.
2
- ## Authored by: John Lawrence M. Penafiel
1
+ # Salamander: A minimalistic ruby web crawling framework.
2
+ # Authored by: John Lawrence M. Penafiel
3
3
 
4
4
  require 'time'
5
5
  require 'thread'
@@ -12,21 +12,12 @@ require 'open_uri_redirections'
12
12
  require 'nokogiri'
13
13
  require 'addressable/uri'
14
14
 
15
- ## Module
16
- ## Salamander
17
- ## Description
18
- ## The Crawler module provides an easy way for the other components of the Salamander system to perform crawling.
19
- ## Functions
20
- ## Salamander::crawl
15
+ # The module containing the Salamander framework itself.
21
16
  module Salamander
22
17
 
23
- ## Function
24
- ## get_links
25
- ## Description
26
- ## Extracts outgoing links from the HTML pointed to by the given URL string.
27
- ## Parameters
28
- ## url - The URL of the HTML page the function is extracting links from.
29
- ## html - The HTML data to extract links from.
18
+ # Extracts outgoing links from the HTML pointed to by the given URL string.
19
+ # @param url The URL of the HTML page the function is extracting links from.
20
+ # @param html The HTML data to extract links from.
30
21
  def self.get_links(url, html)
31
22
  # Initialize
32
23
  uri = Addressable::URI.parse(url)
@@ -63,18 +54,15 @@ module Salamander
63
54
  end
64
55
  end
65
56
 
66
- ## Function
67
- ## crawl
68
- ## Description
69
- ## Performs a restricted, unauthenticated, breadth-first crawl of the target web asset.
70
- ## Function blocks until all threads terminate.
71
- ## Parameters
72
- ## urls - Required. A list of strings containing the seed URLs.
73
- ## args - Optional. Default: {}. A hash containing optional arguments for the function.
74
- ## visit - Optional. Default: nil. A lambda which accepts a URL, and returns a boolean which tells the crawler if the URL should be visited.
75
- ## delay - Optional. Default: 1. A positive float indicating the number of seconds between requests in one thread.
76
- ## threads - Optional. Default: 1. A positive integer indicating the number of allowed simultaneous requests to the target web asset.
77
- ## agent - Optional. Default: "Mozilla/5.0 (MSIE 9.0; Windows NT 6.1; Trident/5.0)". The user-agent string to be used.
57
+ # Performs a restricted, unauthenticated, breadth-first crawl of the target web asset.
58
+ # Function blocks until all threads terminate.
59
+ # Optional Arguments (Place these inside the 'args' hash)
60
+ # visit: A lambda which accepts a URL, and returns a boolean which tells the crawler if the URL should be visited.
61
+ # delay: A positive float indicating the number of seconds between requests in one thread. Defaults to 1.
62
+ # threads: A positive integer indicating the number of allowed simultaneous requests to the target web asset. Defaults to 1.
63
+ # agent: The user-agent string to be used. Defaults to "Mozilla/5.0 (MSIE 9.0; Windows NT 6.1; Trident/5.0)".
64
+ # @param urls A list of strings containing the seed URLs.
65
+ # @param args A hash containing optional arguments for the function.
78
66
  def crawl(urls, args = {})
79
67
  # Get arguments
80
68
  visit = nil
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: slmndr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Lawrence M. Penafiel
@@ -30,9 +30,91 @@ cert_chain:
30
30
  kX+zehuhNK2jecNBpCmYOdpV/Tf9rA2qQ+TFBx08FfsibhdjbvXI1oN2uv+KBeAi
31
31
  1ixqHDxvPm+/VQAK6wyHVbo6smzss/cry1yw2JTa6dk=
32
32
  -----END CERTIFICATE-----
33
- date: 2015-06-29 00:00:00.000000000 Z
34
- dependencies: []
35
- description: A minimalistic ruby web crawling framework
33
+ date: 2015-06-30 00:00:00.000000000 Z
34
+ dependencies:
35
+ - !ruby/object:Gem::Dependency
36
+ name: json
37
+ requirement: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - "~>"
40
+ - !ruby/object:Gem::Version
41
+ version: '1.8'
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ version: 1.8.3
45
+ type: :runtime
46
+ prerelease: false
47
+ version_requirements: !ruby/object:Gem::Requirement
48
+ requirements:
49
+ - - "~>"
50
+ - !ruby/object:Gem::Version
51
+ version: '1.8'
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: 1.8.3
55
+ - !ruby/object:Gem::Dependency
56
+ name: open_uri_redirections
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.2'
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: 0.2.1
65
+ type: :runtime
66
+ prerelease: false
67
+ version_requirements: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - "~>"
70
+ - !ruby/object:Gem::Version
71
+ version: '0.2'
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: 0.2.1
75
+ - !ruby/object:Gem::Dependency
76
+ name: nokogiri
77
+ requirement: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - "~>"
80
+ - !ruby/object:Gem::Version
81
+ version: '1.6'
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: 1.6.6.2
85
+ type: :runtime
86
+ prerelease: false
87
+ version_requirements: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - "~>"
90
+ - !ruby/object:Gem::Version
91
+ version: '1.6'
92
+ - - ">="
93
+ - !ruby/object:Gem::Version
94
+ version: 1.6.6.2
95
+ - !ruby/object:Gem::Dependency
96
+ name: addressable
97
+ requirement: !ruby/object:Gem::Requirement
98
+ requirements:
99
+ - - "~>"
100
+ - !ruby/object:Gem::Version
101
+ version: '2.3'
102
+ - - ">="
103
+ - !ruby/object:Gem::Version
104
+ version: 2.3.8
105
+ type: :runtime
106
+ prerelease: false
107
+ version_requirements: !ruby/object:Gem::Requirement
108
+ requirements:
109
+ - - "~>"
110
+ - !ruby/object:Gem::Version
111
+ version: '2.3'
112
+ - - ">="
113
+ - !ruby/object:Gem::Version
114
+ version: 2.3.8
115
+ description: |-
116
+ A minimalistic ruby web crawling framework.
117
+ See https://github.com/penafieljlm/slmndr for more information.
36
118
  email: penafieljlm@gmail.com
37
119
  executables: []
38
120
  extensions: []
metadata.gz.sig CHANGED
Binary file