jazzez 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. data/README +152 -0
  2. data/doc/README.txt +152 -0
  3. data/jazzez.rb +162 -0
  4. metadata +57 -0
data/README ADDED
@@ -0,0 +1,152 @@
1
+
2
+ Documentation for Jazzez Version 1.1.1 gem:
3
+
4
+
5
+ 1. Get the links from URL
6
+
7
+ Ex.
8
+
9
+ require 'jazzez'
10
+ output= Jazzez.new
11
+ puts output.links("google.com\")
12
+
13
+
14
+ Output:
15
+
16
+ http://images.google.com/imghp?hl=en&tab=wi
17
+ http://maps.google.com/maps?hl=en&tab=wl
18
+ http://news.google.com/nwshp?hl=en&tab=wn
19
+ http://video.google.com/?hl=en&tab=wv
20
+ http://mail.google.com/mail/?hl=en&tab=wm
21
+ http://www.google.com/intl/en/options/
22
+ https://www.google.com/accounts/Login?continue=http://66.249.89.44/&hl=en
23
+ http://google.com/advanced_search?hl=en
24
+ http://google.com/preferences?hl=en
25
+ http://google.com/language_tools?hl=en
26
+ http://google.com/intl/en/ads/
27
+ http://google.com/services/
28
+ http://google.com/intl/en/about.html
29
+ http://www.google.com/ncr
30
+ http://google.com/intl/en/privacy.html
31
+
32
+ Usage:
33
+
34
+ 1. Get the URL from User.
35
+ 2. Make sure to check whether it is valid or not.
36
+ 3. If it is valid, then get the source code for that page with the help of Mechanize gem.
37
+ 4. Get all the <a> tags & collect only HREF Values in that page with the help of Mechanize gem
38
+ 5. If the href values not having the domains then add a URL(homepage) + Href value.
39
+ 6. return the results to User as an array
40
+
41
+
42
+
43
+
44
+ 2. Get the Second level links
45
+
46
+
47
+ Ex.
48
+
49
+ require 'jazzez'
50
+ output= Jazzez.new
51
+ puts output.links_level2("google.com\")
52
+
53
+
54
+ Output:
55
+
56
+ It gives the Second level outputs.
57
+
58
+ If you want to see the output of this code then just go to http://jazzez.wordpress.com
59
+
60
+
61
+
62
+ 3. Get the Html tags
63
+
64
+
65
+ Ex.
66
+
67
+ require 'jazzez'
68
+ output= Jazzez.new
69
+ puts output.tagdetails("google.com\")
70
+
71
+
72
+ Output:
73
+
74
+ 1<html tag(s)
75
+ 1</html> tag(s)
76
+ 1<head tag(s)
77
+ 1</head> tag(s)
78
+ 1<body tag(s)
79
+ 1</body> tag(s)
80
+ 2<table tag(s)
81
+ 2</table> tag(s)
82
+ 3<tr tag(s)
83
+ 3</tr> tag(s)
84
+ 9<td tag(s)
85
+ 9</td> tag(s)
86
+ 0<th tag(s)
87
+ 0</th> tag(s)
88
+ 0<l tag(s)
89
+ 0</l> tag(s)
90
+ 0<link tag(s)
91
+ 1<p tag(s)
92
+ 1</p> tag(s)
93
+ 4<div tag(s)
94
+ 4</div> tag(s)
95
+ 0<span tag(s)
96
+ 0</span> tag(s)
97
+ 4<script tag(s)
98
+ 4</script> tag(s)
99
+ 0<ul tag(s)
100
+ 0</ul> tag(s)
101
+ 0<ol tag(s)
102
+ 0</ol> tag(s)
103
+ 16<a tag(s)
104
+ 15</a> tag(s)
105
+ 0<h1 tag(s)
106
+ 0</h1> tag(s)
107
+ 0<h2 tag(s)
108
+ 0</h2> tag(s)
109
+ 0<h3 tag(s)
110
+ 0</h3> tag(s)
111
+ 0<h4 tag(s)
112
+ 0</h4> tag(s)
113
+ 0<h5 tag(s)
114
+ 0</h5> tag(s)
115
+ 0<h6 tag(s)
116
+ 0</h6> tag(s)
117
+ 4<font tag(s)
118
+ 4</font> tag(s)
119
+ 0<select tag(s)
120
+ 0</select> tag(s)
121
+ 0<option tag(s)
122
+ 0</option> tag(s)
123
+
124
+
125
+
126
+ Usage:
127
+
128
+ Easy to answer the below questions
129
+
130
+ How many tables in your code ?
131
+ How many table rows/coloums in your code ?
132
+ How Many div tags opened and how many div tags closed ?
133
+ Are you sure your html tags were properly closed ?
134
+
135
+ More functions available in next version.
136
+
137
+
138
+
139
+ Any queries just send a mail to jazzezravi@gmail.com.
140
+
141
+
142
+ Thanks,
143
+ P.Raveendran
144
+ http://raveendran.wordpress.com
145
+ http://jazzez.wordpress.com
146
+
147
+
148
+
149
+
150
+
151
+
152
+
@@ -0,0 +1,152 @@
1
+
2
+ Documentation for Jazzez Version 1.1.1 gem:
3
+
4
+
5
+ 1. Get the links from URL
6
+
7
+ Ex.
8
+
9
+ require 'jazzez'
10
+ output= Jazzez.new
11
+ puts output.links("google.com\")
12
+
13
+
14
+ Output:
15
+
16
+ http://images.google.com/imghp?hl=en&tab=wi
17
+ http://maps.google.com/maps?hl=en&tab=wl
18
+ http://news.google.com/nwshp?hl=en&tab=wn
19
+ http://video.google.com/?hl=en&tab=wv
20
+ http://mail.google.com/mail/?hl=en&tab=wm
21
+ http://www.google.com/intl/en/options/
22
+ https://www.google.com/accounts/Login?continue=http://66.249.89.44/&hl=en
23
+ http://google.com/advanced_search?hl=en
24
+ http://google.com/preferences?hl=en
25
+ http://google.com/language_tools?hl=en
26
+ http://google.com/intl/en/ads/
27
+ http://google.com/services/
28
+ http://google.com/intl/en/about.html
29
+ http://www.google.com/ncr
30
+ http://google.com/intl/en/privacy.html
31
+
32
+ Usage:
33
+
34
+ 1. Get the URL from User.
35
+ 2. Make sure to check whether it is valid or not.
36
+ 3. If it is valid, then get the source code for that page with the help of Mechanize gem.
37
+ 4. Get all the <a> tags & collect only HREF Values in that page with the help of Mechanize gem
38
+ 5. If the href values not having the domains then add a URL(homepage) + Href value.
39
+ 6. return the results to User as an array
40
+
41
+
42
+
43
+
44
+ 2. Get the Second level links
45
+
46
+
47
+ Ex.
48
+
49
+ require 'jazzez'
50
+ output= Jazzez.new
51
+ puts output.links_level2("google.com\")
52
+
53
+
54
+ Output:
55
+
56
+ It gives the Second level outputs.
57
+
58
+ If you want to see the output of this code then just go to http://jazzez.wordpress.com
59
+
60
+
61
+
62
+ 3. Get the Html tags
63
+
64
+
65
+ Ex.
66
+
67
+ require 'jazzez'
68
+ output= Jazzez.new
69
+ puts output.tagdetails("google.com\")
70
+
71
+
72
+ Output:
73
+
74
+ 1<html tag(s)
75
+ 1</html> tag(s)
76
+ 1<head tag(s)
77
+ 1</head> tag(s)
78
+ 1<body tag(s)
79
+ 1</body> tag(s)
80
+ 2<table tag(s)
81
+ 2</table> tag(s)
82
+ 3<tr tag(s)
83
+ 3</tr> tag(s)
84
+ 9<td tag(s)
85
+ 9</td> tag(s)
86
+ 0<th tag(s)
87
+ 0</th> tag(s)
88
+ 0<l tag(s)
89
+ 0</l> tag(s)
90
+ 0<link tag(s)
91
+ 1<p tag(s)
92
+ 1</p> tag(s)
93
+ 4<div tag(s)
94
+ 4</div> tag(s)
95
+ 0<span tag(s)
96
+ 0</span> tag(s)
97
+ 4<script tag(s)
98
+ 4</script> tag(s)
99
+ 0<ul tag(s)
100
+ 0</ul> tag(s)
101
+ 0<ol tag(s)
102
+ 0</ol> tag(s)
103
+ 16<a tag(s)
104
+ 15</a> tag(s)
105
+ 0<h1 tag(s)
106
+ 0</h1> tag(s)
107
+ 0<h2 tag(s)
108
+ 0</h2> tag(s)
109
+ 0<h3 tag(s)
110
+ 0</h3> tag(s)
111
+ 0<h4 tag(s)
112
+ 0</h4> tag(s)
113
+ 0<h5 tag(s)
114
+ 0</h5> tag(s)
115
+ 0<h6 tag(s)
116
+ 0</h6> tag(s)
117
+ 4<font tag(s)
118
+ 4</font> tag(s)
119
+ 0<select tag(s)
120
+ 0</select> tag(s)
121
+ 0<option tag(s)
122
+ 0</option> tag(s)
123
+
124
+
125
+
126
+ Usage:
127
+
128
+ Easy to answer the below questions
129
+
130
+ How many tables in your code ?
131
+ How many table rows/coloums in your code ?
132
+ How Many div tags opened and how many div tags closed ?
133
+ Are you sure your html tags were properly closed ?
134
+
135
+ More functions available in next version.
136
+
137
+
138
+
139
+ Any queries just send a mail to jazzezravi@gmail.com.
140
+
141
+
142
+ Thanks,
143
+ P.Raveendran
144
+ http://raveendran.wordpress.com
145
+ http://jazzez.wordpress.com
146
+
147
+
148
+
149
+
150
+
151
+
152
+
@@ -0,0 +1,162 @@
1
+ require 'rubygems'
2
+
3
+ class Jazzez
4
+
5
+
6
+ def check_http(url)
7
+ #Convert into String
8
+ @url=url.to_s
9
+ #Variable need when --> url without starting http://
10
+ @http="http://"
11
+ # Add http:// when url without starting http://
12
+ @url = @http+@url if @url[0,4] != "http"
13
+ # Get a homepage or domain
14
+ @homepage=@http+@url.split('/')[2]
15
+ end
16
+
17
+ def create_agent
18
+ #Require the Mechanize gem
19
+ require 'mechanize'
20
+ #create a new object for Mechanize class
21
+ @agent = WWW::Mechanize.new
22
+ end
23
+
24
+ def check_URL_length(url)
25
+ #Raise error when given URL length is less than 4 characters.
26
+ raise "The given URL is not a valid one.Please provide a valid URL"if url.strip.length < 4
27
+ end
28
+
29
+ def links(url)
30
+ # call method --> check_URL_length
31
+ check_URL_length(url)
32
+ # call method --> check_http
33
+ check_http(url)
34
+ # call method --> create_agent
35
+ create_agent
36
+ # output array
37
+ @level0=[]
38
+ #Get the source code for particular url or page
39
+ page = @agent.get(@url) rescue page = 1 #in case any error the assign page =1
40
+ if page!=1
41
+ # If the page has links then
42
+ if page.links !=nil
43
+ #Set of links available then
44
+ page.links.each do |one|
45
+ #Get the uri and convert into String
46
+ href=one.uri.to_s rescue next
47
+ #Add http:// when url without starting http://
48
+ href=@homepage+href if href[0,4] != "http"
49
+ # Push the output into the array
50
+ @level0 << href.to_s
51
+ end
52
+ # The array is empty then raise error
53
+ @empty=@level0.empty?
54
+ raise "Oops. Something went wrong. Check the given URL have any links inside or not" if @empty == true
55
+ end
56
+ #return the output
57
+ return @level0
58
+ else
59
+ #Otherwise raise this error
60
+ raise "Oops. Something went wrong.
61
+ 1. Check whether the given URL is valid or not.
62
+ 2. Check your internet connection.
63
+ Try again now.."
64
+ end
65
+ end
66
+
67
+ def levels(url)
68
+ # Dummy method for LEVEL 2 related links
69
+ check_http(url)
70
+ create_agent
71
+ @level0=[]
72
+ page = @agent.get(@url) rescue page = 1
73
+ if page!=1
74
+ if page.links !=nil
75
+ page.links.each do |one|
76
+ href=one.uri.to_s rescue next
77
+ href=@homepage+href if href[0,4] != "http"
78
+ @level0 << href.to_s
79
+ end
80
+ end
81
+ end
82
+ #return the output
83
+ return @level0
84
+ end
85
+
86
+
87
+ def array_links(links)
88
+ @final_output=[]
89
+ @arraylinks=[]
90
+ @arraylinks=links
91
+ @arraylinks.each do |link|
92
+ levels(link) if (@url.split('/')[2]== link.split('/')[2]) == true
93
+ @final_output<<@level0
94
+ end
95
+
96
+ end
97
+
98
+ def backup
99
+ @level1_output << @level0
100
+ end
101
+
102
+ def links_level2(url)
103
+ # call method --> links
104
+ links(url)
105
+ #level1_output
106
+ @level1_output=[]
107
+ # call method -->backup
108
+ backup
109
+ # call method --> array_links
110
+ array_links(@level0)
111
+
112
+ @final_output=@final_output.flatten
113
+ @final_output=@final_output.uniq
114
+ @level1_output << @final_output
115
+ @level1_output=@level1_output.flatten
116
+ @level1_output=@level1_output.uniq
117
+ return @level1_output.sort # final output
118
+ end
119
+
120
+ def tagdetails(url)
121
+ # call method --> check_URL_length
122
+ check_URL_length(url)
123
+ # call method --> check_http
124
+ check_http(url)
125
+ # call method --> create_agent
126
+ create_agent
127
+ page = @agent.get(@url) rescue page =1
128
+ raise "oops. Something went wrong.
129
+ 1. Check the given URL is valid or not.
130
+ 2. Check your internet connection" if page ==1
131
+ #Get the body content
132
+ source=page.body
133
+ #What are the Tags we are going to count
134
+ search=["<html","</html>","<head","</head>","<body","</body>","<table","</table>","<tr","</tr>","<td","</td>","<th","</th>","<l ","</l>","<link","<p","</p>","<div","</div>","<span","</span>","<script","</script>","<ul","</ul>","<ol","</ol>","<a","</a>","<h1","</h1>","<h2","</h2>","<h3","</h3>","<h4","</h4>","<h5","</h5>","<h6","</h6>","<font","</font>","<select","</select>","<option","</option>"]
135
+ tag=[]
136
+ taghelp=[]
137
+ result=[]
138
+ source.each do |line|
139
+ i=0
140
+ while i < search.length do
141
+ # Search the terms
142
+ taghelp = line.downcase.scan(search[i]).to_a
143
+
144
+ taghelp.each do |result_tag|
145
+ #push the results
146
+ tag << result_tag.to_s
147
+ end
148
+ i+=1
149
+ end
150
+ end
151
+ j=0
152
+ while j< search.length do
153
+ #counting the times
154
+ count= tag.grep(search[j])
155
+ #Main result
156
+ result << count.length.to_s + search[j].to_s + " tag(s)"
157
+ j+=1
158
+ end
159
+ return result # returns the result
160
+ end
161
+
162
+ end
metadata ADDED
@@ -0,0 +1,57 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.4
3
+ specification_version: 1
4
+ name: jazzez
5
+ version: !ruby/object:Gem::Version
6
+ version: 1.1.1
7
+ date: 2009-04-14 00:00:00 +05:30
8
+ summary: Get Links,level 2 links and Tag details from URL
9
+ require_paths:
10
+ - .
11
+ email: jazzezravi@gmail.com
12
+ homepage: http://jazzez.wordpress.com/
13
+ rubyforge_project: jazzez
14
+ description:
15
+ autorequire:
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: true
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ post_install_message:
29
+ authors:
30
+ - Jazzezravi
31
+ files:
32
+ - jazzez.rb
33
+ - doc/README.txt
34
+ - README
35
+ test_files: []
36
+
37
+ rdoc_options: []
38
+
39
+ extra_rdoc_files:
40
+ - README
41
+ - doc/README.txt
42
+ executables: []
43
+
44
+ extensions: []
45
+
46
+ requirements: []
47
+
48
+ dependencies:
49
+ - !ruby/object:Gem::Dependency
50
+ name: mechanize
51
+ version_requirement:
52
+ version_requirements: !ruby/object:Gem::Version::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: 0.7.5
57
+ version: