jazzez 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +152 -0
- data/doc/README.txt +152 -0
- data/jazzez.rb +162 -0
- metadata +57 -0
data/README
ADDED
@@ -0,0 +1,152 @@
|
|
1
|
+
|
2
|
+
Documentation for Jazzez Version 1.1.1 gem:
|
3
|
+
|
4
|
+
|
5
|
+
1. Get the links from URL
|
6
|
+
|
7
|
+
Ex.
|
8
|
+
|
9
|
+
require 'jazzez'
|
10
|
+
output= Jazzez.new
|
11
|
+
puts output.links("google.com\")
|
12
|
+
|
13
|
+
|
14
|
+
Output:
|
15
|
+
|
16
|
+
http://images.google.com/imghp?hl=en&tab=wi
|
17
|
+
http://maps.google.com/maps?hl=en&tab=wl
|
18
|
+
http://news.google.com/nwshp?hl=en&tab=wn
|
19
|
+
http://video.google.com/?hl=en&tab=wv
|
20
|
+
http://mail.google.com/mail/?hl=en&tab=wm
|
21
|
+
http://www.google.com/intl/en/options/
|
22
|
+
https://www.google.com/accounts/Login?continue=http://66.249.89.44/&hl=en
|
23
|
+
http://google.com/advanced_search?hl=en
|
24
|
+
http://google.com/preferences?hl=en
|
25
|
+
http://google.com/language_tools?hl=en
|
26
|
+
http://google.com/intl/en/ads/
|
27
|
+
http://google.com/services/
|
28
|
+
http://google.com/intl/en/about.html
|
29
|
+
http://www.google.com/ncr
|
30
|
+
http://google.com/intl/en/privacy.html
|
31
|
+
|
32
|
+
Usage:
|
33
|
+
|
34
|
+
1. Get the URL from User.
|
35
|
+
2. Make sure to check whether it is valid or not.
|
36
|
+
3. If it is valid, then get the source code for that page with the help of Mechanize gem.
|
37
|
+
4. Get all the <a> tags & collect only HREF Values in that page with the help of Mechanize gem
|
38
|
+
5. If the href values not having the domains then add a URL(homepage) + Href value.
|
39
|
+
6. return the results to User as an array
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
|
44
|
+
2. Get the Second level links
|
45
|
+
|
46
|
+
|
47
|
+
Ex.
|
48
|
+
|
49
|
+
require 'jazzez'
|
50
|
+
output= Jazzez.new
|
51
|
+
puts output.links_level2("google.com\")
|
52
|
+
|
53
|
+
|
54
|
+
Output:
|
55
|
+
|
56
|
+
It gives the Second level outputs.
|
57
|
+
|
58
|
+
If you want to see the output of this code then just go to http://jazzez.wordpress.com
|
59
|
+
|
60
|
+
|
61
|
+
|
62
|
+
3. Get the Html tags
|
63
|
+
|
64
|
+
|
65
|
+
Ex.
|
66
|
+
|
67
|
+
require 'jazzez'
|
68
|
+
output= Jazzez.new
|
69
|
+
puts output.tagdetails("google.com\")
|
70
|
+
|
71
|
+
|
72
|
+
Output:
|
73
|
+
|
74
|
+
1<html tag(s)
|
75
|
+
1</html> tag(s)
|
76
|
+
1<head tag(s)
|
77
|
+
1</head> tag(s)
|
78
|
+
1<body tag(s)
|
79
|
+
1</body> tag(s)
|
80
|
+
2<table tag(s)
|
81
|
+
2</table> tag(s)
|
82
|
+
3<tr tag(s)
|
83
|
+
3</tr> tag(s)
|
84
|
+
9<td tag(s)
|
85
|
+
9</td> tag(s)
|
86
|
+
0<th tag(s)
|
87
|
+
0</th> tag(s)
|
88
|
+
0<l tag(s)
|
89
|
+
0</l> tag(s)
|
90
|
+
0<link tag(s)
|
91
|
+
1<p tag(s)
|
92
|
+
1</p> tag(s)
|
93
|
+
4<div tag(s)
|
94
|
+
4</div> tag(s)
|
95
|
+
0<span tag(s)
|
96
|
+
0</span> tag(s)
|
97
|
+
4<script tag(s)
|
98
|
+
4</script> tag(s)
|
99
|
+
0<ul tag(s)
|
100
|
+
0</ul> tag(s)
|
101
|
+
0<ol tag(s)
|
102
|
+
0</ol> tag(s)
|
103
|
+
16<a tag(s)
|
104
|
+
15</a> tag(s)
|
105
|
+
0<h1 tag(s)
|
106
|
+
0</h1> tag(s)
|
107
|
+
0<h2 tag(s)
|
108
|
+
0</h2> tag(s)
|
109
|
+
0<h3 tag(s)
|
110
|
+
0</h3> tag(s)
|
111
|
+
0<h4 tag(s)
|
112
|
+
0</h4> tag(s)
|
113
|
+
0<h5 tag(s)
|
114
|
+
0</h5> tag(s)
|
115
|
+
0<h6 tag(s)
|
116
|
+
0</h6> tag(s)
|
117
|
+
4<font tag(s)
|
118
|
+
4</font> tag(s)
|
119
|
+
0<select tag(s)
|
120
|
+
0</select> tag(s)
|
121
|
+
0<option tag(s)
|
122
|
+
0</option> tag(s)
|
123
|
+
|
124
|
+
|
125
|
+
|
126
|
+
Usage:
|
127
|
+
|
128
|
+
Easy to answer the below questions
|
129
|
+
|
130
|
+
How many tables in your code ?
|
131
|
+
How many table rows/coloums in your code ?
|
132
|
+
How Many div tags opened and how many div tags closed ?
|
133
|
+
Are you sure your html tags were properly closed ?
|
134
|
+
|
135
|
+
More functions available in next version.
|
136
|
+
|
137
|
+
|
138
|
+
|
139
|
+
Any queries just send a mail to jazzezravi@gmail.com.
|
140
|
+
|
141
|
+
|
142
|
+
Thanks,
|
143
|
+
P.Raveendran
|
144
|
+
http://raveendran.wordpress.com
|
145
|
+
http://jazzez.wordpress.com
|
146
|
+
|
147
|
+
|
148
|
+
|
149
|
+
|
150
|
+
|
151
|
+
|
152
|
+
|
data/doc/README.txt
ADDED
@@ -0,0 +1,152 @@
|
|
1
|
+
|
2
|
+
Documentation for Jazzez Version 1.1.1 gem:
|
3
|
+
|
4
|
+
|
5
|
+
1. Get the links from URL
|
6
|
+
|
7
|
+
Ex.
|
8
|
+
|
9
|
+
require 'jazzez'
|
10
|
+
output= Jazzez.new
|
11
|
+
puts output.links("google.com\")
|
12
|
+
|
13
|
+
|
14
|
+
Output:
|
15
|
+
|
16
|
+
http://images.google.com/imghp?hl=en&tab=wi
|
17
|
+
http://maps.google.com/maps?hl=en&tab=wl
|
18
|
+
http://news.google.com/nwshp?hl=en&tab=wn
|
19
|
+
http://video.google.com/?hl=en&tab=wv
|
20
|
+
http://mail.google.com/mail/?hl=en&tab=wm
|
21
|
+
http://www.google.com/intl/en/options/
|
22
|
+
https://www.google.com/accounts/Login?continue=http://66.249.89.44/&hl=en
|
23
|
+
http://google.com/advanced_search?hl=en
|
24
|
+
http://google.com/preferences?hl=en
|
25
|
+
http://google.com/language_tools?hl=en
|
26
|
+
http://google.com/intl/en/ads/
|
27
|
+
http://google.com/services/
|
28
|
+
http://google.com/intl/en/about.html
|
29
|
+
http://www.google.com/ncr
|
30
|
+
http://google.com/intl/en/privacy.html
|
31
|
+
|
32
|
+
Usage:
|
33
|
+
|
34
|
+
1. Get the URL from User.
|
35
|
+
2. Make sure to check whether it is valid or not.
|
36
|
+
3. If it is valid, then get the source code for that page with the help of Mechanize gem.
|
37
|
+
4. Get all the <a> tags & collect only HREF Values in that page with the help of Mechanize gem
|
38
|
+
5. If the href values not having the domains then add a URL(homepage) + Href value.
|
39
|
+
6. return the results to User as an array
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
|
44
|
+
2. Get the Second level links
|
45
|
+
|
46
|
+
|
47
|
+
Ex.
|
48
|
+
|
49
|
+
require 'jazzez'
|
50
|
+
output= Jazzez.new
|
51
|
+
puts output.links_level2("google.com\")
|
52
|
+
|
53
|
+
|
54
|
+
Output:
|
55
|
+
|
56
|
+
It gives the Second level outputs.
|
57
|
+
|
58
|
+
If you want to see the output of this code then just go to http://jazzez.wordpress.com
|
59
|
+
|
60
|
+
|
61
|
+
|
62
|
+
3. Get the Html tags
|
63
|
+
|
64
|
+
|
65
|
+
Ex.
|
66
|
+
|
67
|
+
require 'jazzez'
|
68
|
+
output= Jazzez.new
|
69
|
+
puts output.tagdetails("google.com\")
|
70
|
+
|
71
|
+
|
72
|
+
Output:
|
73
|
+
|
74
|
+
1<html tag(s)
|
75
|
+
1</html> tag(s)
|
76
|
+
1<head tag(s)
|
77
|
+
1</head> tag(s)
|
78
|
+
1<body tag(s)
|
79
|
+
1</body> tag(s)
|
80
|
+
2<table tag(s)
|
81
|
+
2</table> tag(s)
|
82
|
+
3<tr tag(s)
|
83
|
+
3</tr> tag(s)
|
84
|
+
9<td tag(s)
|
85
|
+
9</td> tag(s)
|
86
|
+
0<th tag(s)
|
87
|
+
0</th> tag(s)
|
88
|
+
0<l tag(s)
|
89
|
+
0</l> tag(s)
|
90
|
+
0<link tag(s)
|
91
|
+
1<p tag(s)
|
92
|
+
1</p> tag(s)
|
93
|
+
4<div tag(s)
|
94
|
+
4</div> tag(s)
|
95
|
+
0<span tag(s)
|
96
|
+
0</span> tag(s)
|
97
|
+
4<script tag(s)
|
98
|
+
4</script> tag(s)
|
99
|
+
0<ul tag(s)
|
100
|
+
0</ul> tag(s)
|
101
|
+
0<ol tag(s)
|
102
|
+
0</ol> tag(s)
|
103
|
+
16<a tag(s)
|
104
|
+
15</a> tag(s)
|
105
|
+
0<h1 tag(s)
|
106
|
+
0</h1> tag(s)
|
107
|
+
0<h2 tag(s)
|
108
|
+
0</h2> tag(s)
|
109
|
+
0<h3 tag(s)
|
110
|
+
0</h3> tag(s)
|
111
|
+
0<h4 tag(s)
|
112
|
+
0</h4> tag(s)
|
113
|
+
0<h5 tag(s)
|
114
|
+
0</h5> tag(s)
|
115
|
+
0<h6 tag(s)
|
116
|
+
0</h6> tag(s)
|
117
|
+
4<font tag(s)
|
118
|
+
4</font> tag(s)
|
119
|
+
0<select tag(s)
|
120
|
+
0</select> tag(s)
|
121
|
+
0<option tag(s)
|
122
|
+
0</option> tag(s)
|
123
|
+
|
124
|
+
|
125
|
+
|
126
|
+
Usage:
|
127
|
+
|
128
|
+
Easy to answer the below questions
|
129
|
+
|
130
|
+
How many tables in your code ?
|
131
|
+
How many table rows/coloums in your code ?
|
132
|
+
How Many div tags opened and how many div tags closed ?
|
133
|
+
Are you sure your html tags were properly closed ?
|
134
|
+
|
135
|
+
More functions available in next version.
|
136
|
+
|
137
|
+
|
138
|
+
|
139
|
+
Any queries just send a mail to jazzezravi@gmail.com.
|
140
|
+
|
141
|
+
|
142
|
+
Thanks,
|
143
|
+
P.Raveendran
|
144
|
+
http://raveendran.wordpress.com
|
145
|
+
http://jazzez.wordpress.com
|
146
|
+
|
147
|
+
|
148
|
+
|
149
|
+
|
150
|
+
|
151
|
+
|
152
|
+
|
data/jazzez.rb
ADDED
@@ -0,0 +1,162 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
|
3
|
+
class Jazzez
|
4
|
+
|
5
|
+
|
6
|
+
def check_http(url)
|
7
|
+
#Convert into String
|
8
|
+
@url=url.to_s
|
9
|
+
#Variable need when --> url without starting http://
|
10
|
+
@http="http://"
|
11
|
+
# Add http:// when url without starting http://
|
12
|
+
@url = @http+@url if @url[0,4] != "http"
|
13
|
+
# Get a homepage or domain
|
14
|
+
@homepage=@http+@url.split('/')[2]
|
15
|
+
end
|
16
|
+
|
17
|
+
def create_agent
|
18
|
+
#Require the Mechanize gem
|
19
|
+
require 'mechanize'
|
20
|
+
#create a new object for Mechanize class
|
21
|
+
@agent = WWW::Mechanize.new
|
22
|
+
end
|
23
|
+
|
24
|
+
def check_URL_length(url)
|
25
|
+
#Raise error when given URL length is less than 4 characters.
|
26
|
+
raise "The given URL is not a valid one.Please provide a valid URL"if url.strip.length < 4
|
27
|
+
end
|
28
|
+
|
29
|
+
def links(url)
|
30
|
+
# call method --> check_URL_length
|
31
|
+
check_URL_length(url)
|
32
|
+
# call method --> check_http
|
33
|
+
check_http(url)
|
34
|
+
# call method --> create_agent
|
35
|
+
create_agent
|
36
|
+
# output array
|
37
|
+
@level0=[]
|
38
|
+
#Get the source code for particular url or page
|
39
|
+
page = @agent.get(@url) rescue page = 1 #in case any error the assign page =1
|
40
|
+
if page!=1
|
41
|
+
# If the page has links then
|
42
|
+
if page.links !=nil
|
43
|
+
#Set of links available then
|
44
|
+
page.links.each do |one|
|
45
|
+
#Get the uri and convert into String
|
46
|
+
href=one.uri.to_s rescue next
|
47
|
+
#Add http:// when url without starting http://
|
48
|
+
href=@homepage+href if href[0,4] != "http"
|
49
|
+
# Push the output into the array
|
50
|
+
@level0 << href.to_s
|
51
|
+
end
|
52
|
+
# The array is empty then raise error
|
53
|
+
@empty=@level0.empty?
|
54
|
+
raise "Oops. Something went wrong. Check the given URL have any links inside or not" if @empty == true
|
55
|
+
end
|
56
|
+
#return the output
|
57
|
+
return @level0
|
58
|
+
else
|
59
|
+
#Otherwise raise this error
|
60
|
+
raise "Oops. Something went wrong.
|
61
|
+
1. Check whether the given URL is valid or not.
|
62
|
+
2. Check your internet connection.
|
63
|
+
Try again now.."
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def levels(url)
|
68
|
+
# Dummy method for LEVEL 2 related links
|
69
|
+
check_http(url)
|
70
|
+
create_agent
|
71
|
+
@level0=[]
|
72
|
+
page = @agent.get(@url) rescue page = 1
|
73
|
+
if page!=1
|
74
|
+
if page.links !=nil
|
75
|
+
page.links.each do |one|
|
76
|
+
href=one.uri.to_s rescue next
|
77
|
+
href=@homepage+href if href[0,4] != "http"
|
78
|
+
@level0 << href.to_s
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
#return the output
|
83
|
+
return @level0
|
84
|
+
end
|
85
|
+
|
86
|
+
|
87
|
+
def array_links(links)
|
88
|
+
@final_output=[]
|
89
|
+
@arraylinks=[]
|
90
|
+
@arraylinks=links
|
91
|
+
@arraylinks.each do |link|
|
92
|
+
levels(link) if (@url.split('/')[2]== link.split('/')[2]) == true
|
93
|
+
@final_output<<@level0
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
97
|
+
|
98
|
+
def backup
|
99
|
+
@level1_output << @level0
|
100
|
+
end
|
101
|
+
|
102
|
+
def links_level2(url)
|
103
|
+
# call method --> links
|
104
|
+
links(url)
|
105
|
+
#level1_output
|
106
|
+
@level1_output=[]
|
107
|
+
# call method -->backup
|
108
|
+
backup
|
109
|
+
# call method --> array_links
|
110
|
+
array_links(@level0)
|
111
|
+
|
112
|
+
@final_output=@final_output.flatten
|
113
|
+
@final_output=@final_output.uniq
|
114
|
+
@level1_output << @final_output
|
115
|
+
@level1_output=@level1_output.flatten
|
116
|
+
@level1_output=@level1_output.uniq
|
117
|
+
return @level1_output.sort # final output
|
118
|
+
end
|
119
|
+
|
120
|
+
def tagdetails(url)
|
121
|
+
# call method --> check_URL_length
|
122
|
+
check_URL_length(url)
|
123
|
+
# call method --> check_http
|
124
|
+
check_http(url)
|
125
|
+
# call method --> create_agent
|
126
|
+
create_agent
|
127
|
+
page = @agent.get(@url) rescue page =1
|
128
|
+
raise "oops. Something went wrong.
|
129
|
+
1. Check the given URL is valid or not.
|
130
|
+
2. Check your internet connection" if page ==1
|
131
|
+
#Get the body content
|
132
|
+
source=page.body
|
133
|
+
#What are the Tags we are going to count
|
134
|
+
search=["<html","</html>","<head","</head>","<body","</body>","<table","</table>","<tr","</tr>","<td","</td>","<th","</th>","<l ","</l>","<link","<p","</p>","<div","</div>","<span","</span>","<script","</script>","<ul","</ul>","<ol","</ol>","<a","</a>","<h1","</h1>","<h2","</h2>","<h3","</h3>","<h4","</h4>","<h5","</h5>","<h6","</h6>","<font","</font>","<select","</select>","<option","</option>"]
|
135
|
+
tag=[]
|
136
|
+
taghelp=[]
|
137
|
+
result=[]
|
138
|
+
source.each do |line|
|
139
|
+
i=0
|
140
|
+
while i < search.length do
|
141
|
+
# Search the terms
|
142
|
+
taghelp = line.downcase.scan(search[i]).to_a
|
143
|
+
|
144
|
+
taghelp.each do |result_tag|
|
145
|
+
#push the results
|
146
|
+
tag << result_tag.to_s
|
147
|
+
end
|
148
|
+
i+=1
|
149
|
+
end
|
150
|
+
end
|
151
|
+
j=0
|
152
|
+
while j< search.length do
|
153
|
+
#counting the times
|
154
|
+
count= tag.grep(search[j])
|
155
|
+
#Main result
|
156
|
+
result << count.length.to_s + search[j].to_s + " tag(s)"
|
157
|
+
j+=1
|
158
|
+
end
|
159
|
+
return result # returns the result
|
160
|
+
end
|
161
|
+
|
162
|
+
end
|
metadata
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.4
|
3
|
+
specification_version: 1
|
4
|
+
name: jazzez
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 1.1.1
|
7
|
+
date: 2009-04-14 00:00:00 +05:30
|
8
|
+
summary: Get Links,level 2 links and Tag details from URL
|
9
|
+
require_paths:
|
10
|
+
- .
|
11
|
+
email: jazzezravi@gmail.com
|
12
|
+
homepage: http://jazzez.wordpress.com/
|
13
|
+
rubyforge_project: jazzez
|
14
|
+
description:
|
15
|
+
autorequire:
|
16
|
+
default_executable:
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: true
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
24
|
+
version:
|
25
|
+
platform: ruby
|
26
|
+
signing_key:
|
27
|
+
cert_chain:
|
28
|
+
post_install_message:
|
29
|
+
authors:
|
30
|
+
- Jazzezravi
|
31
|
+
files:
|
32
|
+
- jazzez.rb
|
33
|
+
- doc/README.txt
|
34
|
+
- README
|
35
|
+
test_files: []
|
36
|
+
|
37
|
+
rdoc_options: []
|
38
|
+
|
39
|
+
extra_rdoc_files:
|
40
|
+
- README
|
41
|
+
- doc/README.txt
|
42
|
+
executables: []
|
43
|
+
|
44
|
+
extensions: []
|
45
|
+
|
46
|
+
requirements: []
|
47
|
+
|
48
|
+
dependencies:
|
49
|
+
- !ruby/object:Gem::Dependency
|
50
|
+
name: mechanize
|
51
|
+
version_requirement:
|
52
|
+
version_requirements: !ruby/object:Gem::Version::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: 0.7.5
|
57
|
+
version:
|