RubyGems - omni_scrape - Versions diffs - 0.1.8 → 0.1.8.1 - Mend

omni_scrape 0.1.8 → 0.1.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

checksums.yaml CHANGED Viewed

@@ -1,15 +1,7 @@
 ---
-!binary "U0hBMQ==":
-  metadata.gz: !binary |-
-    NjNjMTQ5MmUyZWU0MzJmNDZlNTE1NDAwN2EzOTkwOTliZWZiYzdmMg==
-  data.tar.gz: !binary |-
-    MGUxYmIwMWMxM2FmMjkzY2JlZTg4OWZhNTU2Nzg4NDcyOGM4MTRmOQ==
+SHA1:
+  metadata.gz: bfdd836a78c51ee9764c7aec015ce8438b5691d0
+  data.tar.gz: 25d62caf3ecdf5fed189c8bc4b6608e818b9b42d
 SHA512:
-  metadata.gz: !binary |-
-    ZWJmZDEzMzBjYWY5ZTlkZTZlNGY3ZTFhZmI2Nzc0NjJiYzgzYzhmNDk3Nzhj
-    MzNmZGM1MTJkZGY2MjgyZWY4MWJjZTc5Y2Q0MjVkYzlmNThkMzU0OTExNjJi
-    OTA2MWIzOWU4MmIwYmVmZGY5MTNjZGE0NGMzOTlhZjRlOWU0YmY=
-  data.tar.gz: !binary |-
-    YWVkOTg2NDUzMTk3YmExNDQ0YTlkYjdlMWZiMmY0ZDA0OWRlNWY3ZDkyOWQ2
-    OTY0NWRmY2ZjNTg3NDliOTUxNzg4MzNiOTdjZDdiYTI4MzIyYWY0NzA3MmY0
-    ZDY2NmI1ZWFmZjkyYmVlNmZjOTVmZjMxMzE5NjFiZDlkOTI0OGY=
+  metadata.gz: 83c83e53cb5871c8409af8a5c74a67e303399099baf321bae646c002f8d42160ddc44616bb58b39d8a941d8cf7269f320e5cd107c7f6671eeb6e6a1f317796f5
+  data.tar.gz: 9791b0cf2a6cbfce254a4d60bf3c2c1295eb301c797272b68e1a40c307d0d3babd8e22c5331d90b747ec10bc315e30134542bcb5a660775aac122aa26fe1ab40

data/lib/omni_scrape/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module OmniScrape
-  VERSION = "0.1.8"
+  VERSION = "0.1.8.1"
 end

data/lib/omni_scrape.rb CHANGED Viewed

@@ -6,101 +6,71 @@ module OmniScrape
 def CrawlScrape(url, depth, sub_url)
     if (depth<0)
         depth=0
-    end
-    s_depth = depth
+    end#if
+    s_depth = depth #true
     #open the starting page
-page = Nokogiri::HTML(open(url))
+page = Nokogiri::HTML(open(url))  #good
     #collect all of the links from the page
-links= page.css('a')
+links= page.css('a') #good
     #initialize variables
 refarr=[]
-titlearr=[]
-titles =[]
 hrefs = []
-x=0
     #add title and href to arrays for each link
 links.each do |link|
-			if(link['title']!=nil && link['title']!="" &&link['href']!=nil && link['href']!="")
-			# puts x
-			# puts (link['title'].split.join)
-			# x+=1
-			titles.push((link['title']).split.join)
-			hrefs.push((link['href']).split.join)
-			end
-    end
-	inc=0
+			if(link['href']!=nil && link['href']!="")
+			hrefs.push(link)
+			end#if
+    end#do
     #transfer links to other array
 	while(!hrefs.empty?)
 		value= hrefs.pop
-		puts value
 		refarr.push(value)
-		refarr[inc]
-		inc+=1
-	end
-	inc=0
-    #transfer titles to other array
-    while(!titles.empty?)
-       value = titles.pop
-	   puts value
-	   titlearr.push(value)
-	   puts titlearr[inc]
-	   inc+=1
-	end
-    #setup for recognition of the end of the array
+	end#while
+	#setup for recognition of the end of the array
         refarr.push("-")
     #create folder for storing current set of scraped pages
-    g_depth = s_depth
-    while (g_depth>-1)
-    if (Dir.exist?('./pages'+g_depth.to_s))
-    else Dir.mkdir('./pages'+g_depth.to_s)
-end
-        g_depth =g_depth-1
-    end
+    if (Dir.exist?('./results'+depth.to_s))
+    else Dir.mkdir('./results'+depth.to_s)
+	end#if
     #in each link
-for i in 1..titlearr.length
-    if(refarr[i]!="-")
+	check =(refarr.length-1)
+for i in 0..check
+    if(refarr[i]!="-")#still valid links
         #evaluate whether link is internal or external
-        if(refarr[i].include?('http://'))
-            url=refarr[i]
+        if(refarr[i]['href'].include?('http://') && refarr[i]!=nil)
+            url=refarr[i]['href']
             else
-    url=sub_url+refarr[i]
-            end
+    url=sub_url+refarr[i]['href']
+            end#if include?
 	fourofour=false
 		begin
 			if(fourofour==false)
 			pagina = Nokogiri::HTML(open(url))
-			end
+			end#if
             #test for a 404
 		rescue Exception =>ex
-			puts "got a 404"
 			fourofour=true
 			retry
-		end
+		end#begin
 		if (fourofour==false)
-            #trim it down and remove special characters
-		trimval=titlearr[i]
-		finval=trimval.gsub!(/[!:\/-]/, '')
-        puts titlearr[i]
-		if(finval==nil && titlearr[i]!=nil)
-		finval=titlearr[i]
-		end
-		puts finval
-		if(finval!=nil)
             #store html from the link with title of the link
-            crfile=File.new(('./results'+depth.to_s+"/"+finval+".html").chomp,"w")
+            crfile=File.new(('./results'+depth.to_s+"/page"+i.to_s+".html").chomp,"w")
     crfile.puts pagina
     crfile.close
-	end
-	end
-        end
+	end#if
+	end#if  != "-"
 end#end for each
-    puts "finished"
 	end#def crawlscrape
 #############################################################################################

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: omni_scrape
 version: !ruby/object:Gem::Version
-  version: 0.1.8
+  version: 0.1.8.1
 platform: ruby
 authors:
 - Bradley Maynard
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2015-06-16 00:00:00.000000000 Z
+date: 2015-06-17 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: nokogiri
@@ -78,17 +78,17 @@ require_paths:
 - lib
 required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
-  - - ! '>='
+  - - '>='
     - !ruby/object:Gem::Version
       version: '0'
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
-  - - ! '>='
+  - - '>='
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.4.8
+rubygems_version: 2.0.14
 signing_key:
 specification_version: 4
 summary: This is an all-purpose web scraper