RubyGems - wriggler - Versions diffs - 1.3.0 → 1.4.0 - Mend

wriggler 1.3.0 → 1.4.0

Files changed (4) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 92e026f6d129cad6ba890e1c23e98ad8a48a2918
-  data.tar.gz: 5f759a905eec16613eec0cdcf8e1f0d8e19b4e1c
+  metadata.gz: fb5bcfb711baec8080be58ec329c95066d4cbee4
+  data.tar.gz: 3ff6eb28fd6f06f27398d48f4ada9d9808b73d86
 SHA512:
-  metadata.gz: ca9c5e3a15dc0b0422522b241d01b5b7ac10de666d114a8b87a5114fc4439328323f9a04628022e02d2376a40f83a98a3da20b9921425f1f49a39ce4bda431e0
-  data.tar.gz: 03d25bae580a894d6251b5cc3c19057971414715ca6f6a3e61561723e8242a1048d0ed81f3665f6892b5911ea5b49cab7374dbc7ba42deaa445519c0cd4007f5
+  metadata.gz: 1f5fab9e467d49fd8b4f5806381501dccc6c08866af5f756c976ab1c12f42421125c90e79e0999397058b2d4162386bdee03e9a5091ca5e9043e8e8c209489e4
+  data.tar.gz: 48f151ba4e2e2c42853f2fb06670ead00b7f44aa9ed660882c70ea02e6618862e2804408786eaa5197942c201610749ec5123a95d1a12727f6982e576f0a5822

data/lib/wriggler/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Wriggler
-  VERSION = "1.3.0"
+  VERSION = "1.4.0"
 end

data/lib/wriggler.rb CHANGED Viewed

@@ -3,80 +3,116 @@ require "nokogiri"
 require "find"
 module Wriggler
-	attr_reader :content, :directory
+  attr_reader :content, :directory
   def self.crawl(tags=[], directory="")
-    @content = Hash[tags.map {|k| [k, []]}]   #Hash with content
-    @directory = directory                    #Current top-level directory
+    @content    = Hash[tags.map {|k| [k, []]}]  #Hash with content
+    @directory  = directory                     #Current top-level directory
     navigate_directory
     @content
   end
+  private
   def self.navigate_directory
- 		#Set the cwd to the given dir send to gather all nested files from there
- 		Dir.chdir(@directory)
- 		gather_files
+    #Set the cwd to the given dir send to gather all nested files from there
+    Dir.chdir(@directory)
+    gather_files
   end
   def self.gather_files
-  	#Gathers all of the HTML or XML files from this and all subdirectories into an array
+    #Gathers all of the HTML or XML files from this and all subdirectories into an array
     Find.find(@directory) do |file|
-      if is_XML?(file) || is_HTML?(file)
+      if is_XML?(file) || is_HTML?(file) || is_TXT?(file)
         open_next_file(file)
       end
     end
   end
   def self.open_next_file(file)
-  	#Opens the next file on the list, depending on the extension passes it to HTML or XML
-  	f = File.open(file)
-  	if is_HTML?(file)
-  		set_HTML(f)
-  	elsif is_XML?(file)
-  		set_XML(f)
-  	end
+    #Opens the next file on the list, depending on the extension passes it to HTML or XML
+    f = File.open(file)
+    if is_HTML?(file)
+      set_HTML(f)
+    elsif is_XML?(file)
+      set_XML(f)
+    elsif is_TXT?(file)
+      set_TXT(f)
+    end
   end
   def self.is_HTML?(file)
-  	#Determines, using a regex check, if it is an HTML file
-  	file =~ /.html/
+    #Determines, using a regex check, if it is an HTML file
+    file =~ /.html/
   end
   def self.is_XML?(file)
-  	#Determines, using a regex check, if it is an XML file
-  	file =~ /.xml/
+    #Determines, using a regex check, if it is an XML file
+    file =~ /.xml/
+  end
+  def self.is_TXT?(file)
+    #Determines, using a regex check, if it is a TXT file
+    file =~ /.txt/
   end
   def self.set_HTML(file)
-  	#Set the HTML file into Nokogiri for crawling
-  	doc = Nokogiri::HTML(file)
-  	crawl_file(doc)
+    #Set the HTML file into Nokogiri for crawling
+    doc = Nokogiri::HTML(file)
+    crawl_file(doc)
   end
   def self.set_XML(file)
-  	#Set the XML file into Nokogiri for crawling
-  	doc = Nokogiri::XML(file)
-  	crawl_file(doc)
+    #Set the XML file into Nokogiri for crawling
+    doc = Nokogiri::XML(file)
+    crawl_file(doc)
+  end
+  def self.set_TXT(file)
+    #Set the TXT file into a readable String for Regex checking
+    doc = File.read(file)
+    txt_content(doc)
   end
   def self.crawl_file(doc)
     #Crawl the Nokogiri Object for the file
     @content.each_key do |key|
       arr = []
-      if !doc.xpath("//#{key}").empty?        #Returns an empty array if tag is not present
+      if !doc.xpath("//#{key}").empty?
         doc.xpath("//#{key}").map{ |tag| arr << sanitize(tag.text) }
+      elsif key == "html"
+        arr << "#{doc}"
       else
-        doc.xpath("//#{key}").map{ |_| arr << "" }
+        arr << ""
       end
       @content.fetch(key) << arr
     end
   end
-  def self.sanitize(text)
-  	#Removes any escaped quotes, replaces them
-  	text.gsub(/"/, "'").lstrip.chomp
+  def self.txt_content(doc)
+    #Now run through the raw text and regex out what is inbetween the tags
+    @content.each_key do |key|
+      arr = []
+      if key == "html"
+        arr << "#{doc}"
+      elsif contains_key(doc, key)
+        arr << doc.slice(/<#{key}>(.*)<\/#{key}>/).gsub(/<\/?\w+>/, "")
+      else
+        arr << ""
+      end
+      @content.fetch(key) << arr
+    end
   end
+  def self.contains_key(doc, key)
+    #Checks if the String contains the necessary tags
+    doc.include?("<#{key}>") && doc.include?("</#{key}>")
+  end
+  def self.sanitize(text)
+    #Removes any escaped quotes, replaces them
+    text.gsub(/"/, "'").lstrip.chomp
+  end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: wriggler
 version: !ruby/object:Gem::Version
-  version: 1.3.0
+  version: 1.4.0
 platform: ruby
 authors:
 - Elliott Young
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2015-06-10 00:00:00.000000000 Z
+date: 2015-06-23 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler