RubyGems - wriggler - Versions diffs - 1.3.0 → 1.4.0 - Mend

wriggler 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 92e026f6d129cad6ba890e1c23e98ad8a48a2918
-  data.tar.gz: 5f759a905eec16613eec0cdcf8e1f0d8e19b4e1c
+  metadata.gz: fb5bcfb711baec8080be58ec329c95066d4cbee4
+  data.tar.gz: 3ff6eb28fd6f06f27398d48f4ada9d9808b73d86
 SHA512:
-  metadata.gz: ca9c5e3a15dc0b0422522b241d01b5b7ac10de666d114a8b87a5114fc4439328323f9a04628022e02d2376a40f83a98a3da20b9921425f1f49a39ce4bda431e0
-  data.tar.gz: 03d25bae580a894d6251b5cc3c19057971414715ca6f6a3e61561723e8242a1048d0ed81f3665f6892b5911ea5b49cab7374dbc7ba42deaa445519c0cd4007f5
+  metadata.gz: 1f5fab9e467d49fd8b4f5806381501dccc6c08866af5f756c976ab1c12f42421125c90e79e0999397058b2d4162386bdee03e9a5091ca5e9043e8e8c209489e4
+  data.tar.gz: 48f151ba4e2e2c42853f2fb06670ead00b7f44aa9ed660882c70ea02e6618862e2804408786eaa5197942c201610749ec5123a95d1a12727f6982e576f0a5822

data/lib/wriggler/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Wriggler
-  VERSION = "1.3.0"
+  VERSION = "1.4.0"
 end

data/lib/wriggler.rb CHANGED Viewed

@@ -3,80 +3,116 @@ require "nokogiri"
 require "find"
 module Wriggler
-	attr_reader :content, :directory
+  attr_reader :content, :directory
   def self.crawl(tags=[], directory="")
-    @content = Hash[tags.map {|k| [k, []]}]   #Hash with content
-    @directory = directory                    #Current top-level directory
+    @content    = Hash[tags.map {|k| [k, []]}]  #Hash with content
+    @directory  = directory                     #Current top-level directory
     navigate_directory
     @content
   end
+  private
   def self.navigate_directory
- 		#Set the cwd to the given dir send to gather all nested files from there
- 		Dir.chdir(@directory)
- 		gather_files
+    #Set the cwd to the given dir send to gather all nested files from there
+    Dir.chdir(@directory)
+    gather_files
   end
   def self.gather_files
-  	#Gathers all of the HTML or XML files from this and all subdirectories into an array
+    #Gathers all of the HTML or XML files from this and all subdirectories into an array
     Find.find(@directory) do |file|
-      if is_XML?(file) || is_HTML?(file)
+      if is_XML?(file) || is_HTML?(file) || is_TXT?(file)
         open_next_file(file)
       end
     end
   end
   def self.open_next_file(file)
-  	#Opens the next file on the list, depending on the extension passes it to HTML or XML
-  	f = File.open(file)
-  	if is_HTML?(file)
-  		set_HTML(f)
-  	elsif is_XML?(file)
-  		set_XML(f)
-  	end
+    #Opens the next file on the list, depending on the extension passes it to HTML or XML
+    f = File.open(file)
+    if is_HTML?(file)
+      set_HTML(f)
+    elsif is_XML?(file)
+      set_XML(f)
+    elsif is_TXT?(file)
+      set_TXT(f)
+    end
   end
   def self.is_HTML?(file)
-  	#Determines, using a regex check, if it is an HTML file
-  	file =~ /.html/
+    #Determines, using a regex check, if it is an HTML file
+    file =~ /.html/
   end
   def self.is_XML?(file)
-  	#Determines, using a regex check, if it is an XML file
-  	file =~ /.xml/
+    #Determines, using a regex check, if it is an XML file
+    file =~ /.xml/
+  end
+  def self.is_TXT?(file)
+    #Determines, using a regex check, if it is a TXT file
+    file =~ /.txt/
   end
   def self.set_HTML(file)
-  	#Set the HTML file into Nokogiri for crawling
-  	doc = Nokogiri::HTML(file)
-  	crawl_file(doc)
+    #Set the HTML file into Nokogiri for crawling
+    doc = Nokogiri::HTML(file)
+    crawl_file(doc)
   end
   def self.set_XML(file)
-  	#Set the XML file into Nokogiri for crawling
-  	doc = Nokogiri::XML(file)
-  	crawl_file(doc)
+    #Set the XML file into Nokogiri for crawling
+    doc = Nokogiri::XML(file)
+    crawl_file(doc)
+  end
+  def self.set_TXT(file)
+    #Set the TXT file into a readable String for Regex checking
+    doc = File.read(file)
+    txt_content(doc)
   end
   def self.crawl_file(doc)
     #Crawl the Nokogiri Object for the file
     @content.each_key do |key|
       arr = []
-      if !doc.xpath("//#{key}").empty?        #Returns an empty array if tag is not present
+      if !doc.xpath("//#{key}").empty?
         doc.xpath("//#{key}").map{ |tag| arr << sanitize(tag.text) }
+      elsif key == "html"
+        arr << "#{doc}"
       else
-        doc.xpath("//#{key}").map{ |_| arr << "" }
+        arr << ""
       end
       @content.fetch(key) << arr
     end
   end
-  def self.sanitize(text)
-  	#Removes any escaped quotes, replaces them
-  	text.gsub(/"/, "'").lstrip.chomp
+  def self.txt_content(doc)
+    #Now run through the raw text and regex out what is inbetween the tags
+    @content.each_key do |key|
+      arr = []
+      if key == "html"
+        arr << "#{doc}"
+      elsif contains_key(doc, key)
+        arr << doc.slice(/<#{key}>(.*)<\/#{key}>/).gsub(/<\/?\w+>/, "")
+      else
+        arr << ""
+      end
+      @content.fetch(key) << arr
+    end
   end
+  def self.contains_key(doc, key)
+    #Checks if the String contains the necessary tags
+    doc.include?("<#{key}>") && doc.include?("</#{key}>")
+  end
+  def self.sanitize(text)
+    #Removes any escaped quotes, replaces them
+    text.gsub(/"/, "'").lstrip.chomp
+  end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: wriggler
 version: !ruby/object:Gem::Version
-  version: 1.3.0
+  version: 1.4.0
 platform: ruby
 authors:
 - Elliott Young
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2015-06-10 00:00:00.000000000 Z
+date: 2015-06-23 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler