jazzez 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README +152 -0
- data/doc/README.txt +152 -0
- data/jazzez.rb +162 -0
- metadata +57 -0
data/README
ADDED
@@ -0,0 +1,152 @@
|
|
1
|
+
|
2
|
+
Documentation for Jazzez Version 1.1.1 gem:
|
3
|
+
|
4
|
+
|
5
|
+
1. Get the links from URL
|
6
|
+
|
7
|
+
Ex.
|
8
|
+
|
9
|
+
require 'jazzez'
|
10
|
+
output= Jazzez.new
|
11
|
+
puts output.links("google.com\")
|
12
|
+
|
13
|
+
|
14
|
+
Output:
|
15
|
+
|
16
|
+
http://images.google.com/imghp?hl=en&tab=wi
|
17
|
+
http://maps.google.com/maps?hl=en&tab=wl
|
18
|
+
http://news.google.com/nwshp?hl=en&tab=wn
|
19
|
+
http://video.google.com/?hl=en&tab=wv
|
20
|
+
http://mail.google.com/mail/?hl=en&tab=wm
|
21
|
+
http://www.google.com/intl/en/options/
|
22
|
+
https://www.google.com/accounts/Login?continue=http://66.249.89.44/&hl=en
|
23
|
+
http://google.com/advanced_search?hl=en
|
24
|
+
http://google.com/preferences?hl=en
|
25
|
+
http://google.com/language_tools?hl=en
|
26
|
+
http://google.com/intl/en/ads/
|
27
|
+
http://google.com/services/
|
28
|
+
http://google.com/intl/en/about.html
|
29
|
+
http://www.google.com/ncr
|
30
|
+
http://google.com/intl/en/privacy.html
|
31
|
+
|
32
|
+
Usage:
|
33
|
+
|
34
|
+
1. Get the URL from User.
|
35
|
+
2. Make sure to check whether it is valid or not.
|
36
|
+
3. If it is valid, then get the source code for that page with the help of Mechanize gem.
|
37
|
+
4. Get all the <a> tags & collect only HREF Values in that page with the help of Mechanize gem
|
38
|
+
5. If the href values not having the domains then add a URL(homepage) + Href value.
|
39
|
+
6. return the results to User as an array
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
|
44
|
+
2. Get the Second level links
|
45
|
+
|
46
|
+
|
47
|
+
Ex.
|
48
|
+
|
49
|
+
require 'jazzez'
|
50
|
+
output= Jazzez.new
|
51
|
+
puts output.links_level2("google.com\")
|
52
|
+
|
53
|
+
|
54
|
+
Output:
|
55
|
+
|
56
|
+
It gives the Second level outputs.
|
57
|
+
|
58
|
+
If you want to see the output of this code then just go to http://jazzez.wordpress.com
|
59
|
+
|
60
|
+
|
61
|
+
|
62
|
+
3. Get the Html tags
|
63
|
+
|
64
|
+
|
65
|
+
Ex.
|
66
|
+
|
67
|
+
require 'jazzez'
|
68
|
+
output= Jazzez.new
|
69
|
+
puts output.tagdetails("google.com\")
|
70
|
+
|
71
|
+
|
72
|
+
Output:
|
73
|
+
|
74
|
+
1<html tag(s)
|
75
|
+
1</html> tag(s)
|
76
|
+
1<head tag(s)
|
77
|
+
1</head> tag(s)
|
78
|
+
1<body tag(s)
|
79
|
+
1</body> tag(s)
|
80
|
+
2<table tag(s)
|
81
|
+
2</table> tag(s)
|
82
|
+
3<tr tag(s)
|
83
|
+
3</tr> tag(s)
|
84
|
+
9<td tag(s)
|
85
|
+
9</td> tag(s)
|
86
|
+
0<th tag(s)
|
87
|
+
0</th> tag(s)
|
88
|
+
0<l tag(s)
|
89
|
+
0</l> tag(s)
|
90
|
+
0<link tag(s)
|
91
|
+
1<p tag(s)
|
92
|
+
1</p> tag(s)
|
93
|
+
4<div tag(s)
|
94
|
+
4</div> tag(s)
|
95
|
+
0<span tag(s)
|
96
|
+
0</span> tag(s)
|
97
|
+
4<script tag(s)
|
98
|
+
4</script> tag(s)
|
99
|
+
0<ul tag(s)
|
100
|
+
0</ul> tag(s)
|
101
|
+
0<ol tag(s)
|
102
|
+
0</ol> tag(s)
|
103
|
+
16<a tag(s)
|
104
|
+
15</a> tag(s)
|
105
|
+
0<h1 tag(s)
|
106
|
+
0</h1> tag(s)
|
107
|
+
0<h2 tag(s)
|
108
|
+
0</h2> tag(s)
|
109
|
+
0<h3 tag(s)
|
110
|
+
0</h3> tag(s)
|
111
|
+
0<h4 tag(s)
|
112
|
+
0</h4> tag(s)
|
113
|
+
0<h5 tag(s)
|
114
|
+
0</h5> tag(s)
|
115
|
+
0<h6 tag(s)
|
116
|
+
0</h6> tag(s)
|
117
|
+
4<font tag(s)
|
118
|
+
4</font> tag(s)
|
119
|
+
0<select tag(s)
|
120
|
+
0</select> tag(s)
|
121
|
+
0<option tag(s)
|
122
|
+
0</option> tag(s)
|
123
|
+
|
124
|
+
|
125
|
+
|
126
|
+
Usage:
|
127
|
+
|
128
|
+
Easy to answer the below questions
|
129
|
+
|
130
|
+
How many tables in your code ?
|
131
|
+
How many table rows/coloums in your code ?
|
132
|
+
How Many div tags opened and how many div tags closed ?
|
133
|
+
Are you sure your html tags were properly closed ?
|
134
|
+
|
135
|
+
More functions available in next version.
|
136
|
+
|
137
|
+
|
138
|
+
|
139
|
+
Any queries just send a mail to jazzezravi@gmail.com.
|
140
|
+
|
141
|
+
|
142
|
+
Thanks,
|
143
|
+
P.Raveendran
|
144
|
+
http://raveendran.wordpress.com
|
145
|
+
http://jazzez.wordpress.com
|
146
|
+
|
147
|
+
|
148
|
+
|
149
|
+
|
150
|
+
|
151
|
+
|
152
|
+
|
data/doc/README.txt
ADDED
@@ -0,0 +1,152 @@
|
|
1
|
+
|
2
|
+
Documentation for Jazzez Version 1.1.1 gem:
|
3
|
+
|
4
|
+
|
5
|
+
1. Get the links from URL
|
6
|
+
|
7
|
+
Ex.
|
8
|
+
|
9
|
+
require 'jazzez'
|
10
|
+
output= Jazzez.new
|
11
|
+
puts output.links("google.com\")
|
12
|
+
|
13
|
+
|
14
|
+
Output:
|
15
|
+
|
16
|
+
http://images.google.com/imghp?hl=en&tab=wi
|
17
|
+
http://maps.google.com/maps?hl=en&tab=wl
|
18
|
+
http://news.google.com/nwshp?hl=en&tab=wn
|
19
|
+
http://video.google.com/?hl=en&tab=wv
|
20
|
+
http://mail.google.com/mail/?hl=en&tab=wm
|
21
|
+
http://www.google.com/intl/en/options/
|
22
|
+
https://www.google.com/accounts/Login?continue=http://66.249.89.44/&hl=en
|
23
|
+
http://google.com/advanced_search?hl=en
|
24
|
+
http://google.com/preferences?hl=en
|
25
|
+
http://google.com/language_tools?hl=en
|
26
|
+
http://google.com/intl/en/ads/
|
27
|
+
http://google.com/services/
|
28
|
+
http://google.com/intl/en/about.html
|
29
|
+
http://www.google.com/ncr
|
30
|
+
http://google.com/intl/en/privacy.html
|
31
|
+
|
32
|
+
Usage:
|
33
|
+
|
34
|
+
1. Get the URL from User.
|
35
|
+
2. Make sure to check whether it is valid or not.
|
36
|
+
3. If it is valid, then get the source code for that page with the help of Mechanize gem.
|
37
|
+
4. Get all the <a> tags & collect only HREF Values in that page with the help of Mechanize gem
|
38
|
+
5. If the href values not having the domains then add a URL(homepage) + Href value.
|
39
|
+
6. return the results to User as an array
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
|
44
|
+
2. Get the Second level links
|
45
|
+
|
46
|
+
|
47
|
+
Ex.
|
48
|
+
|
49
|
+
require 'jazzez'
|
50
|
+
output= Jazzez.new
|
51
|
+
puts output.links_level2("google.com\")
|
52
|
+
|
53
|
+
|
54
|
+
Output:
|
55
|
+
|
56
|
+
It gives the Second level outputs.
|
57
|
+
|
58
|
+
If you want to see the output of this code then just go to http://jazzez.wordpress.com
|
59
|
+
|
60
|
+
|
61
|
+
|
62
|
+
3. Get the Html tags
|
63
|
+
|
64
|
+
|
65
|
+
Ex.
|
66
|
+
|
67
|
+
require 'jazzez'
|
68
|
+
output= Jazzez.new
|
69
|
+
puts output.tagdetails("google.com\")
|
70
|
+
|
71
|
+
|
72
|
+
Output:
|
73
|
+
|
74
|
+
1<html tag(s)
|
75
|
+
1</html> tag(s)
|
76
|
+
1<head tag(s)
|
77
|
+
1</head> tag(s)
|
78
|
+
1<body tag(s)
|
79
|
+
1</body> tag(s)
|
80
|
+
2<table tag(s)
|
81
|
+
2</table> tag(s)
|
82
|
+
3<tr tag(s)
|
83
|
+
3</tr> tag(s)
|
84
|
+
9<td tag(s)
|
85
|
+
9</td> tag(s)
|
86
|
+
0<th tag(s)
|
87
|
+
0</th> tag(s)
|
88
|
+
0<l tag(s)
|
89
|
+
0</l> tag(s)
|
90
|
+
0<link tag(s)
|
91
|
+
1<p tag(s)
|
92
|
+
1</p> tag(s)
|
93
|
+
4<div tag(s)
|
94
|
+
4</div> tag(s)
|
95
|
+
0<span tag(s)
|
96
|
+
0</span> tag(s)
|
97
|
+
4<script tag(s)
|
98
|
+
4</script> tag(s)
|
99
|
+
0<ul tag(s)
|
100
|
+
0</ul> tag(s)
|
101
|
+
0<ol tag(s)
|
102
|
+
0</ol> tag(s)
|
103
|
+
16<a tag(s)
|
104
|
+
15</a> tag(s)
|
105
|
+
0<h1 tag(s)
|
106
|
+
0</h1> tag(s)
|
107
|
+
0<h2 tag(s)
|
108
|
+
0</h2> tag(s)
|
109
|
+
0<h3 tag(s)
|
110
|
+
0</h3> tag(s)
|
111
|
+
0<h4 tag(s)
|
112
|
+
0</h4> tag(s)
|
113
|
+
0<h5 tag(s)
|
114
|
+
0</h5> tag(s)
|
115
|
+
0<h6 tag(s)
|
116
|
+
0</h6> tag(s)
|
117
|
+
4<font tag(s)
|
118
|
+
4</font> tag(s)
|
119
|
+
0<select tag(s)
|
120
|
+
0</select> tag(s)
|
121
|
+
0<option tag(s)
|
122
|
+
0</option> tag(s)
|
123
|
+
|
124
|
+
|
125
|
+
|
126
|
+
Usage:
|
127
|
+
|
128
|
+
Easy to answer the below questions
|
129
|
+
|
130
|
+
How many tables in your code ?
|
131
|
+
How many table rows/coloums in your code ?
|
132
|
+
How Many div tags opened and how many div tags closed ?
|
133
|
+
Are you sure your html tags were properly closed ?
|
134
|
+
|
135
|
+
More functions available in next version.
|
136
|
+
|
137
|
+
|
138
|
+
|
139
|
+
Any queries just send a mail to jazzezravi@gmail.com.
|
140
|
+
|
141
|
+
|
142
|
+
Thanks,
|
143
|
+
P.Raveendran
|
144
|
+
http://raveendran.wordpress.com
|
145
|
+
http://jazzez.wordpress.com
|
146
|
+
|
147
|
+
|
148
|
+
|
149
|
+
|
150
|
+
|
151
|
+
|
152
|
+
|
data/jazzez.rb
ADDED
@@ -0,0 +1,162 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
|
3
|
+
class Jazzez
|
4
|
+
|
5
|
+
|
6
|
+
def check_http(url)
|
7
|
+
#Convert into String
|
8
|
+
@url=url.to_s
|
9
|
+
#Variable need when --> url without starting http://
|
10
|
+
@http="http://"
|
11
|
+
# Add http:// when url without starting http://
|
12
|
+
@url = @http+@url if @url[0,4] != "http"
|
13
|
+
# Get a homepage or domain
|
14
|
+
@homepage=@http+@url.split('/')[2]
|
15
|
+
end
|
16
|
+
|
17
|
+
def create_agent
|
18
|
+
#Require the Mechanize gem
|
19
|
+
require 'mechanize'
|
20
|
+
#create a new object for Mechanize class
|
21
|
+
@agent = WWW::Mechanize.new
|
22
|
+
end
|
23
|
+
|
24
|
+
def check_URL_length(url)
|
25
|
+
#Raise error when given URL length is less than 4 characters.
|
26
|
+
raise "The given URL is not a valid one.Please provide a valid URL"if url.strip.length < 4
|
27
|
+
end
|
28
|
+
|
29
|
+
def links(url)
|
30
|
+
# call method --> check_URL_length
|
31
|
+
check_URL_length(url)
|
32
|
+
# call method --> check_http
|
33
|
+
check_http(url)
|
34
|
+
# call method --> create_agent
|
35
|
+
create_agent
|
36
|
+
# output array
|
37
|
+
@level0=[]
|
38
|
+
#Get the source code for particular url or page
|
39
|
+
page = @agent.get(@url) rescue page = 1 #in case any error the assign page =1
|
40
|
+
if page!=1
|
41
|
+
# If the page has links then
|
42
|
+
if page.links !=nil
|
43
|
+
#Set of links available then
|
44
|
+
page.links.each do |one|
|
45
|
+
#Get the uri and convert into String
|
46
|
+
href=one.uri.to_s rescue next
|
47
|
+
#Add http:// when url without starting http://
|
48
|
+
href=@homepage+href if href[0,4] != "http"
|
49
|
+
# Push the output into the array
|
50
|
+
@level0 << href.to_s
|
51
|
+
end
|
52
|
+
# The array is empty then raise error
|
53
|
+
@empty=@level0.empty?
|
54
|
+
raise "Oops. Something went wrong. Check the given URL have any links inside or not" if @empty == true
|
55
|
+
end
|
56
|
+
#return the output
|
57
|
+
return @level0
|
58
|
+
else
|
59
|
+
#Otherwise raise this error
|
60
|
+
raise "Oops. Something went wrong.
|
61
|
+
1. Check whether the given URL is valid or not.
|
62
|
+
2. Check your internet connection.
|
63
|
+
Try again now.."
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def levels(url)
|
68
|
+
# Dummy method for LEVEL 2 related links
|
69
|
+
check_http(url)
|
70
|
+
create_agent
|
71
|
+
@level0=[]
|
72
|
+
page = @agent.get(@url) rescue page = 1
|
73
|
+
if page!=1
|
74
|
+
if page.links !=nil
|
75
|
+
page.links.each do |one|
|
76
|
+
href=one.uri.to_s rescue next
|
77
|
+
href=@homepage+href if href[0,4] != "http"
|
78
|
+
@level0 << href.to_s
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
#return the output
|
83
|
+
return @level0
|
84
|
+
end
|
85
|
+
|
86
|
+
|
87
|
+
def array_links(links)
|
88
|
+
@final_output=[]
|
89
|
+
@arraylinks=[]
|
90
|
+
@arraylinks=links
|
91
|
+
@arraylinks.each do |link|
|
92
|
+
levels(link) if (@url.split('/')[2]== link.split('/')[2]) == true
|
93
|
+
@final_output<<@level0
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
97
|
+
|
98
|
+
def backup
|
99
|
+
@level1_output << @level0
|
100
|
+
end
|
101
|
+
|
102
|
+
def links_level2(url)
|
103
|
+
# call method --> links
|
104
|
+
links(url)
|
105
|
+
#level1_output
|
106
|
+
@level1_output=[]
|
107
|
+
# call method -->backup
|
108
|
+
backup
|
109
|
+
# call method --> array_links
|
110
|
+
array_links(@level0)
|
111
|
+
|
112
|
+
@final_output=@final_output.flatten
|
113
|
+
@final_output=@final_output.uniq
|
114
|
+
@level1_output << @final_output
|
115
|
+
@level1_output=@level1_output.flatten
|
116
|
+
@level1_output=@level1_output.uniq
|
117
|
+
return @level1_output.sort # final output
|
118
|
+
end
|
119
|
+
|
120
|
+
def tagdetails(url)
|
121
|
+
# call method --> check_URL_length
|
122
|
+
check_URL_length(url)
|
123
|
+
# call method --> check_http
|
124
|
+
check_http(url)
|
125
|
+
# call method --> create_agent
|
126
|
+
create_agent
|
127
|
+
page = @agent.get(@url) rescue page =1
|
128
|
+
raise "oops. Something went wrong.
|
129
|
+
1. Check the given URL is valid or not.
|
130
|
+
2. Check your internet connection" if page ==1
|
131
|
+
#Get the body content
|
132
|
+
source=page.body
|
133
|
+
#What are the Tags we are going to count
|
134
|
+
search=["<html","</html>","<head","</head>","<body","</body>","<table","</table>","<tr","</tr>","<td","</td>","<th","</th>","<l ","</l>","<link","<p","</p>","<div","</div>","<span","</span>","<script","</script>","<ul","</ul>","<ol","</ol>","<a","</a>","<h1","</h1>","<h2","</h2>","<h3","</h3>","<h4","</h4>","<h5","</h5>","<h6","</h6>","<font","</font>","<select","</select>","<option","</option>"]
|
135
|
+
tag=[]
|
136
|
+
taghelp=[]
|
137
|
+
result=[]
|
138
|
+
source.each do |line|
|
139
|
+
i=0
|
140
|
+
while i < search.length do
|
141
|
+
# Search the terms
|
142
|
+
taghelp = line.downcase.scan(search[i]).to_a
|
143
|
+
|
144
|
+
taghelp.each do |result_tag|
|
145
|
+
#push the results
|
146
|
+
tag << result_tag.to_s
|
147
|
+
end
|
148
|
+
i+=1
|
149
|
+
end
|
150
|
+
end
|
151
|
+
j=0
|
152
|
+
while j< search.length do
|
153
|
+
#counting the times
|
154
|
+
count= tag.grep(search[j])
|
155
|
+
#Main result
|
156
|
+
result << count.length.to_s + search[j].to_s + " tag(s)"
|
157
|
+
j+=1
|
158
|
+
end
|
159
|
+
return result # returns the result
|
160
|
+
end
|
161
|
+
|
162
|
+
end
|
metadata
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.4
|
3
|
+
specification_version: 1
|
4
|
+
name: jazzez
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 1.1.1
|
7
|
+
date: 2009-04-14 00:00:00 +05:30
|
8
|
+
summary: Get Links,level 2 links and Tag details from URL
|
9
|
+
require_paths:
|
10
|
+
- .
|
11
|
+
email: jazzezravi@gmail.com
|
12
|
+
homepage: http://jazzez.wordpress.com/
|
13
|
+
rubyforge_project: jazzez
|
14
|
+
description:
|
15
|
+
autorequire:
|
16
|
+
default_executable:
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: true
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
24
|
+
version:
|
25
|
+
platform: ruby
|
26
|
+
signing_key:
|
27
|
+
cert_chain:
|
28
|
+
post_install_message:
|
29
|
+
authors:
|
30
|
+
- Jazzezravi
|
31
|
+
files:
|
32
|
+
- jazzez.rb
|
33
|
+
- doc/README.txt
|
34
|
+
- README
|
35
|
+
test_files: []
|
36
|
+
|
37
|
+
rdoc_options: []
|
38
|
+
|
39
|
+
extra_rdoc_files:
|
40
|
+
- README
|
41
|
+
- doc/README.txt
|
42
|
+
executables: []
|
43
|
+
|
44
|
+
extensions: []
|
45
|
+
|
46
|
+
requirements: []
|
47
|
+
|
48
|
+
dependencies:
|
49
|
+
- !ruby/object:Gem::Dependency
|
50
|
+
name: mechanize
|
51
|
+
version_requirement:
|
52
|
+
version_requirements: !ruby/object:Gem::Version::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: 0.7.5
|
57
|
+
version:
|