wriggler 0.1.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5d35c4b6d7c7da4483e293b47fdbcc06a9348172
4
- data.tar.gz: 9a7372283313d57c7ab1d3d39320c544422d5b2c
3
+ metadata.gz: 1d741f12caa9d0cae037e2689e76ff0aa6adee13
4
+ data.tar.gz: d84a237b2f9c49ef76df0a1fa7db330d32d031ea
5
5
  SHA512:
6
- metadata.gz: 0cb56c3f6062e77532fe7676acb7b68140a8b2fe4b9d3b21015b77b159fbc259b295aee19082210444804c632bdc647247f7314fdb82908ea604582b82908790
7
- data.tar.gz: f8c9cbd5a96acbb701a9ecd0e5fc50c34e5a0b6ad88f56464ea6c969a2d0fa314f8ffc34dece4f71df8598571cce1db1423c1fa7b4b109f6283f71c93a522a0e
6
+ metadata.gz: 7fd896183312ea3a2f9ab5e00836dfaf12f7af1486796deedd2f80f1c3f851ae437d29d8c48566a8f89c8ff04148b51c797ad82a8518fd65fb025178f75c80c0
7
+ data.tar.gz: 0aa7b338518a6285e5bb3aac82ff401b0157a3b72f805b49f2b066851e1aa81f22a7f03924db2625ea50eece17c6492942424664e380fa2c50ab89d5dd0f0b8d
@@ -0,0 +1 @@
1
+ <test>If this appears it works</test>
data/dirtest/test1.xml CHANGED
@@ -1,6 +1,7 @@
1
1
  <root>
2
2
  <sitcoms>
3
3
  <sitcom>
4
+ <test>This is different</test>
4
5
  <name>Married with Children</name>
5
6
  <characters>
6
7
  <character>Al Bundy</character>
@@ -0,0 +1,7 @@
1
+ <div id = "buttons">
2
+ <button id="bye">Bye</button>
3
+ <button id="hello">Hello</button>
4
+ </div>
5
+ <div>
6
+ <p>1: <span id="greeting">Greeting</span></p>
7
+ </div>
@@ -1,3 +1,3 @@
1
1
  module Wriggler
2
- VERSION = "0.1.0"
2
+ VERSION = "0.5.0"
3
3
  end
data/lib/wriggler.rb CHANGED
@@ -1,32 +1,43 @@
1
1
  require "wriggler/version"
2
2
  require "nokogiri"
3
+ require "find"
3
4
 
4
5
  module Wriggler
5
6
  attr_reader :content, :directory
6
7
 
7
- def crawl(tags=[], directory="", subdirectories=true)
8
- @content = Hash[tags.map {|k| [k, []]}] #Hash with content
9
- @subdirectories = subdirectories #Default true for the existence of subdirs
10
- @directory = directory #Directory to grab files from
8
+ def self.crawl(tags=[], directory="")
9
+ @content = Hash[tags.map {|k| [k, []]}] #Hash with content
10
+ @directory = directory #Current top-level directory
11
11
 
12
- navigate_directory
13
- Writer.write_to_csv(@content)
14
- end
12
+ navigate_directory
13
+ Writer.write(@content)
14
+ end
15
15
 
16
16
  private
17
17
 
18
- def navigate_directory
18
+ def self.navigate_directory
19
19
  #Set the cwd to the given dir send to gather all nested files from there
20
20
  Dir.chdir(@directory)
21
- gather_files
21
+ open_files(gather_files)
22
22
  end
23
23
 
24
- def gather_files
25
- #Gathers all of the HTML or XML files from this and all subdirectories
24
+ def self.gather_files
25
+ #Gathers all of the HTML or XML files from this and all subdirectories into an array
26
+ file_array = []
27
+ Find.find(@directory) do |file|
28
+ file_array << file if file.match(/\.xml\Z/) || file.match(/\.html\Z/)
29
+ end
30
+ file_array
31
+ end
26
32
 
33
+ def self.open_files(file_array)
34
+ #Opens all the files in the file_array
35
+ file_array.each do |file|
36
+ open_next_file(file)
37
+ end
27
38
  end
28
39
 
29
- def open_next_file(file)
40
+ def self.open_next_file(file)
30
41
  #Opens the next file on the list, depending on the extension passes it to HTML or XML
31
42
  f = File.open(file)
32
43
 
@@ -37,44 +48,52 @@ module Wriggler
37
48
  end
38
49
  end
39
50
 
40
- def is_HTML?(file)
51
+ def self.is_HTML?(file)
41
52
  #Determines, using a regex check, if it is an HTML file
42
53
  file =~ /.html/
43
54
  end
44
55
 
45
- def is_XML?(file)
56
+ def self.is_XML?(file)
46
57
  #Determines, using a regex check, if it is an XML file
47
58
  file =~ /.xml/
48
59
  end
49
60
 
50
- def set_HTML(file)
61
+ def self.set_HTML(file)
51
62
  #Set the HTML file into Nokogiri for crawling
52
63
  doc = Nokogiri::HTML(file)
53
64
  crawl_file(doc)
54
65
  end
55
66
 
56
- def set_XML(file)
67
+ def self.set_XML(file)
57
68
  #Set the XML file into Nokogiri for crawling
58
69
  doc = Nokogiri::XML(file)
59
70
  crawl_file(doc)
60
71
  end
61
72
 
62
- def crawl_file(doc)
73
+ def self.crawl_file(doc)
63
74
  #Crawl the Nokogiri Object for the file
64
75
  @content.each_key do |key|
76
+ arr = []
65
77
  if !doc.xpath("//#{key}").empty? #Returns an empty array if tag is not present
66
- doc.xpath("//#{key}").map{ |tag| @content.fetch(key) << sanitize(tag.text) }
78
+ doc.xpath("//#{key}").map{ |tag| arr << sanitize(tag.text) }
67
79
  end
80
+ fill_content(arr, key)
68
81
  end
69
82
  end
70
83
 
71
- def sanitize(text)
84
+ def self.sanitize(text)
72
85
  #Removes any escaped quotes, replaces them
73
- text.gsub(/"/, "'")
86
+ text.gsub(/"/, "'").lstrip.chomp
87
+ end
88
+
89
+ def self.fill_content(arr, key)
90
+ #Doesn't shovel if there is no content found for the specific tag
91
+ !arr.empty? ? (@content.fetch(key) << arr) : nil
74
92
  end
75
93
  end
76
94
 
77
95
  module Writer
78
- def write_to_csv(content)
96
+ def write(content)
97
+ @content = content
79
98
  end
80
99
  end
data/test.rb CHANGED
@@ -1,91 +1,93 @@
1
1
  require "nokogiri"
2
+ require "find"
2
3
 
3
4
  class Wriggler
4
- def initialize(tags=[], directory="", subdirectories=true)
5
- @content = Hash[tags.map {|k| [k, []]}] #Hash with content
6
- @subdirectories = subdirectories #Default true for the existence of subdirs
7
- @directory = directory #Directory to grab files from
5
+ def initialize(tags=[], directory="", subdirectories=true)
6
+ @content = Hash[tags.map {|k| [k, []]}] #Hash with content
7
+ @directory = directory #Current top-level directory
8
+ @subdirectories = subdirectories #Default true for the existence of subdirs
8
9
 
9
- navigate_directory
10
- # Writer.write_to_csv(@content)
11
- end
10
+ navigate_directory
11
+ p @content
12
+ # Writer.write(@content)
13
+ end
12
14
 
13
15
  private
14
16
 
15
17
  def navigate_directory
16
- #Set the cwd to the given dir send to gather all nested files from there
17
- Dir.chdir(@directory)
18
- gather_files
18
+ #Set the cwd to the given dir send to gather all nested files from there
19
+ Dir.chdir(@directory)
20
+ open_files(gather_files)
19
21
  end
20
22
 
21
23
  def gather_files
22
- #Gathers all of the HTML or XML files from this and all subdirectories
23
- open_next_file("test1.xml")
24
- puts "=============="
25
- puts "1:"
26
- p @content
27
- puts "=============="
28
- puts ""
29
- open_next_file("test2.xml")
30
- puts "=============="
31
- puts "2:"
32
- p @content
33
- puts "=============="
34
- puts ""
35
- open_next_file("test3.xml")
36
- puts "=============="
37
- puts "3:"
38
- p @content
39
- puts "=============="
40
- puts ""
24
+ #Gathers all of the HTML or XML files from this and all subdirectories into an array
25
+ file_array = []
26
+ Find.find(@directory) do |f|
27
+ file_array << f if f.match(/\.xml\Z/) || f.match(/\.html\Z/)
28
+ end
29
+ file_array
30
+ end
31
+
32
+ def open_files(file_array)
33
+ file_array.each do |file|
34
+ open_next_file(file)
35
+ end
41
36
  end
42
37
 
43
38
  def open_next_file(file)
44
- #Opens the next file on the list, depending on the extension passes it to HTML or XML
45
- f = File.open(file)
46
-
47
- if is_html?(file)
48
- set_HTML(f)
49
- elsif is_xml?(file)
50
- set_XML(f)
51
- end
39
+ #Opens the next file on the list, depending on the extension passes it to HTML or XML
40
+ f = File.open(file)
41
+
42
+ if is_HTML?(file)
43
+ set_HTML(f)
44
+ elsif is_XML?(file)
45
+ set_XML(f)
46
+ end
52
47
  end
53
48
 
54
- def is_html?(file)
55
- #Determines, using a regex check, if it is an HTML file
56
- file =~ /.html/
49
+ def is_HTML?(file)
50
+ #Determines, using a regex check, if it is an HTML file
51
+ file =~ /.html/
57
52
  end
58
53
 
59
- def is_xml?(file)
60
- #Determines, using a regex check, if it is an XML file
61
- file =~ /.xml/
54
+ def is_XML?(file)
55
+ #Determines, using a regex check, if it is an XML file
56
+ file =~ /.xml/
62
57
  end
63
58
 
64
59
  def set_HTML(file)
65
- #Set the HTML file into Nokogiri for crawling
66
- doc = Nokogiri::HTML(file)
67
- crawl_file(doc)
60
+ #Set the HTML file into Nokogiri for crawling
61
+ doc = Nokogiri::HTML(file)
62
+ crawl_file(doc)
68
63
  end
69
64
 
70
65
  def set_XML(file)
71
- #Set the XML file into Nokogiri for crawling
72
- doc = Nokogiri::XML(file)
73
- crawl_file(doc)
66
+ #Set the XML file into Nokogiri for crawling
67
+ doc = Nokogiri::XML(file)
68
+ crawl_file(doc)
74
69
  end
75
70
 
76
71
  def crawl_file(doc)
77
- #Crawl the Nokogiri Object for the file
78
- @content.each_key do |key|
79
- if !doc.xpath("//#{key}").empty? #Returns an empty array if tag is not present
80
- doc.xpath("//#{key}").map{ |tag| @content.fetch(key) << sanitize(tag.text) }
81
- end
82
- end
72
+ #Crawl the Nokogiri Object for the file
73
+ @content.each_key do |key|
74
+ arr = []
75
+ if !doc.xpath("//#{key}").empty? #Returns an empty array if tag is not present
76
+ doc.xpath("//#{key}").map{ |tag| arr << sanitize(tag.text) }
77
+ end
78
+ fill_content(arr, key)
79
+ end
83
80
  end
84
81
 
85
82
  def sanitize(text)
86
- #Removes any escaped quotes, replaces them
87
- text.gsub(/"/, "'")
83
+ #Removes any escaped quotes, replaces them
84
+ text.gsub(/"/, "'").lstrip.chomp
85
+ end
86
+
87
+ def fill_content(arr, key)
88
+ #Doesn't shovel if there is no content found for the specific tag
89
+ !arr.empty? ? (@content.fetch(key) << arr) : nil
88
90
  end
89
91
  end
90
92
 
91
- test = Wriggler.new(["character", "content", "name", "title"], "/Users/47900/Desktop/Ruby/wriggler/dirtest", false)
93
+ test = Wriggler.new(["character", "content", "name", "title", "test"], "/Users/47900/Desktop/Ruby/wriggler/dirtest", false)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wriggler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elliott Young
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-06-07 00:00:00.000000000 Z
11
+ date: 2015-06-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -85,9 +85,11 @@ files:
85
85
  - Rakefile
86
86
  - bin/console
87
87
  - bin/setup
88
+ - dirtest/nested_fldr/test5.xml
88
89
  - dirtest/test1.xml
89
90
  - dirtest/test2.xml
90
91
  - dirtest/test3.xml
92
+ - dirtest/test4.html
91
93
  - lib/wriggler.rb
92
94
  - lib/wriggler/version.rb
93
95
  - test.rb