wriggler 0.1.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5d35c4b6d7c7da4483e293b47fdbcc06a9348172
4
- data.tar.gz: 9a7372283313d57c7ab1d3d39320c544422d5b2c
3
+ metadata.gz: 1d741f12caa9d0cae037e2689e76ff0aa6adee13
4
+ data.tar.gz: d84a237b2f9c49ef76df0a1fa7db330d32d031ea
5
5
  SHA512:
6
- metadata.gz: 0cb56c3f6062e77532fe7676acb7b68140a8b2fe4b9d3b21015b77b159fbc259b295aee19082210444804c632bdc647247f7314fdb82908ea604582b82908790
7
- data.tar.gz: f8c9cbd5a96acbb701a9ecd0e5fc50c34e5a0b6ad88f56464ea6c969a2d0fa314f8ffc34dece4f71df8598571cce1db1423c1fa7b4b109f6283f71c93a522a0e
6
+ metadata.gz: 7fd896183312ea3a2f9ab5e00836dfaf12f7af1486796deedd2f80f1c3f851ae437d29d8c48566a8f89c8ff04148b51c797ad82a8518fd65fb025178f75c80c0
7
+ data.tar.gz: 0aa7b338518a6285e5bb3aac82ff401b0157a3b72f805b49f2b066851e1aa81f22a7f03924db2625ea50eece17c6492942424664e380fa2c50ab89d5dd0f0b8d
@@ -0,0 +1 @@
1
+ <test>If this appears it works</test>
data/dirtest/test1.xml CHANGED
@@ -1,6 +1,7 @@
1
1
  <root>
2
2
  <sitcoms>
3
3
  <sitcom>
4
+ <test>This is different</test>
4
5
  <name>Married with Children</name>
5
6
  <characters>
6
7
  <character>Al Bundy</character>
@@ -0,0 +1,7 @@
1
+ <div id = "buttons">
2
+ <button id="bye">Bye</button>
3
+ <button id="hello">Hello</button>
4
+ </div>
5
+ <div>
6
+ <p>1: <span id="greeting">Greeting</span></p>
7
+ </div>
@@ -1,3 +1,3 @@
1
1
  module Wriggler
2
- VERSION = "0.1.0"
2
+ VERSION = "0.5.0"
3
3
  end
data/lib/wriggler.rb CHANGED
@@ -1,32 +1,43 @@
1
1
  require "wriggler/version"
2
2
  require "nokogiri"
3
+ require "find"
3
4
 
4
5
  module Wriggler
5
6
  attr_reader :content, :directory
6
7
 
7
- def crawl(tags=[], directory="", subdirectories=true)
8
- @content = Hash[tags.map {|k| [k, []]}] #Hash with content
9
- @subdirectories = subdirectories #Default true for the existence of subdirs
10
- @directory = directory #Directory to grab files from
8
+ def self.crawl(tags=[], directory="")
9
+ @content = Hash[tags.map {|k| [k, []]}] #Hash with content
10
+ @directory = directory #Current top-level directory
11
11
 
12
- navigate_directory
13
- Writer.write_to_csv(@content)
14
- end
12
+ navigate_directory
13
+ Writer.write(@content)
14
+ end
15
15
 
16
16
  private
17
17
 
18
- def navigate_directory
18
+ def self.navigate_directory
19
19
  #Set the cwd to the given dir send to gather all nested files from there
20
20
  Dir.chdir(@directory)
21
- gather_files
21
+ open_files(gather_files)
22
22
  end
23
23
 
24
- def gather_files
25
- #Gathers all of the HTML or XML files from this and all subdirectories
24
+ def self.gather_files
25
+ #Gathers all of the HTML or XML files from this and all subdirectories into an array
26
+ file_array = []
27
+ Find.find(@directory) do |file|
28
+ file_array << file if file.match(/\.xml\Z/) || file.match(/\.html\Z/)
29
+ end
30
+ file_array
31
+ end
26
32
 
33
+ def self.open_files(file_array)
34
+ #Opens all the files in the file_array
35
+ file_array.each do |file|
36
+ open_next_file(file)
37
+ end
27
38
  end
28
39
 
29
- def open_next_file(file)
40
+ def self.open_next_file(file)
30
41
  #Opens the next file on the list, depending on the extension passes it to HTML or XML
31
42
  f = File.open(file)
32
43
 
@@ -37,44 +48,52 @@ module Wriggler
37
48
  end
38
49
  end
39
50
 
40
- def is_HTML?(file)
51
+ def self.is_HTML?(file)
41
52
  #Determines, using a regex check, if it is an HTML file
42
53
  file =~ /.html/
43
54
  end
44
55
 
45
- def is_XML?(file)
56
+ def self.is_XML?(file)
46
57
  #Determines, using a regex check, if it is an XML file
47
58
  file =~ /.xml/
48
59
  end
49
60
 
50
- def set_HTML(file)
61
+ def self.set_HTML(file)
51
62
  #Set the HTML file into Nokogiri for crawling
52
63
  doc = Nokogiri::HTML(file)
53
64
  crawl_file(doc)
54
65
  end
55
66
 
56
- def set_XML(file)
67
+ def self.set_XML(file)
57
68
  #Set the XML file into Nokogiri for crawling
58
69
  doc = Nokogiri::XML(file)
59
70
  crawl_file(doc)
60
71
  end
61
72
 
62
- def crawl_file(doc)
73
+ def self.crawl_file(doc)
63
74
  #Crawl the Nokogiri Object for the file
64
75
  @content.each_key do |key|
76
+ arr = []
65
77
  if !doc.xpath("//#{key}").empty? #Returns an empty array if tag is not present
66
- doc.xpath("//#{key}").map{ |tag| @content.fetch(key) << sanitize(tag.text) }
78
+ doc.xpath("//#{key}").map{ |tag| arr << sanitize(tag.text) }
67
79
  end
80
+ fill_content(arr, key)
68
81
  end
69
82
  end
70
83
 
71
- def sanitize(text)
84
+ def self.sanitize(text)
72
85
  #Removes any escaped quotes, replaces them
73
- text.gsub(/"/, "'")
86
+ text.gsub(/"/, "'").lstrip.chomp
87
+ end
88
+
89
+ def self.fill_content(arr, key)
90
+ #Doesn't shovel if there is no content found for the specific tag
91
+ !arr.empty? ? (@content.fetch(key) << arr) : nil
74
92
  end
75
93
  end
76
94
 
77
95
  module Writer
78
- def write_to_csv(content)
96
+ def write(content)
97
+ @content = content
79
98
  end
80
99
  end
data/test.rb CHANGED
@@ -1,91 +1,93 @@
1
1
  require "nokogiri"
2
+ require "find"
2
3
 
3
4
  class Wriggler
4
- def initialize(tags=[], directory="", subdirectories=true)
5
- @content = Hash[tags.map {|k| [k, []]}] #Hash with content
6
- @subdirectories = subdirectories #Default true for the existence of subdirs
7
- @directory = directory #Directory to grab files from
5
+ def initialize(tags=[], directory="", subdirectories=true)
6
+ @content = Hash[tags.map {|k| [k, []]}] #Hash with content
7
+ @directory = directory #Current top-level directory
8
+ @subdirectories = subdirectories #Default true for the existence of subdirs
8
9
 
9
- navigate_directory
10
- # Writer.write_to_csv(@content)
11
- end
10
+ navigate_directory
11
+ p @content
12
+ # Writer.write(@content)
13
+ end
12
14
 
13
15
  private
14
16
 
15
17
  def navigate_directory
16
- #Set the cwd to the given dir send to gather all nested files from there
17
- Dir.chdir(@directory)
18
- gather_files
18
+ #Set the cwd to the given dir send to gather all nested files from there
19
+ Dir.chdir(@directory)
20
+ open_files(gather_files)
19
21
  end
20
22
 
21
23
  def gather_files
22
- #Gathers all of the HTML or XML files from this and all subdirectories
23
- open_next_file("test1.xml")
24
- puts "=============="
25
- puts "1:"
26
- p @content
27
- puts "=============="
28
- puts ""
29
- open_next_file("test2.xml")
30
- puts "=============="
31
- puts "2:"
32
- p @content
33
- puts "=============="
34
- puts ""
35
- open_next_file("test3.xml")
36
- puts "=============="
37
- puts "3:"
38
- p @content
39
- puts "=============="
40
- puts ""
24
+ #Gathers all of the HTML or XML files from this and all subdirectories into an array
25
+ file_array = []
26
+ Find.find(@directory) do |f|
27
+ file_array << f if f.match(/\.xml\Z/) || f.match(/\.html\Z/)
28
+ end
29
+ file_array
30
+ end
31
+
32
+ def open_files(file_array)
33
+ file_array.each do |file|
34
+ open_next_file(file)
35
+ end
41
36
  end
42
37
 
43
38
  def open_next_file(file)
44
- #Opens the next file on the list, depending on the extension passes it to HTML or XML
45
- f = File.open(file)
46
-
47
- if is_html?(file)
48
- set_HTML(f)
49
- elsif is_xml?(file)
50
- set_XML(f)
51
- end
39
+ #Opens the next file on the list, depending on the extension passes it to HTML or XML
40
+ f = File.open(file)
41
+
42
+ if is_HTML?(file)
43
+ set_HTML(f)
44
+ elsif is_XML?(file)
45
+ set_XML(f)
46
+ end
52
47
  end
53
48
 
54
- def is_html?(file)
55
- #Determines, using a regex check, if it is an HTML file
56
- file =~ /.html/
49
+ def is_HTML?(file)
50
+ #Determines, using a regex check, if it is an HTML file
51
+ file =~ /.html/
57
52
  end
58
53
 
59
- def is_xml?(file)
60
- #Determines, using a regex check, if it is an XML file
61
- file =~ /.xml/
54
+ def is_XML?(file)
55
+ #Determines, using a regex check, if it is an XML file
56
+ file =~ /.xml/
62
57
  end
63
58
 
64
59
  def set_HTML(file)
65
- #Set the HTML file into Nokogiri for crawling
66
- doc = Nokogiri::HTML(file)
67
- crawl_file(doc)
60
+ #Set the HTML file into Nokogiri for crawling
61
+ doc = Nokogiri::HTML(file)
62
+ crawl_file(doc)
68
63
  end
69
64
 
70
65
  def set_XML(file)
71
- #Set the XML file into Nokogiri for crawling
72
- doc = Nokogiri::XML(file)
73
- crawl_file(doc)
66
+ #Set the XML file into Nokogiri for crawling
67
+ doc = Nokogiri::XML(file)
68
+ crawl_file(doc)
74
69
  end
75
70
 
76
71
  def crawl_file(doc)
77
- #Crawl the Nokogiri Object for the file
78
- @content.each_key do |key|
79
- if !doc.xpath("//#{key}").empty? #Returns an empty array if tag is not present
80
- doc.xpath("//#{key}").map{ |tag| @content.fetch(key) << sanitize(tag.text) }
81
- end
82
- end
72
+ #Crawl the Nokogiri Object for the file
73
+ @content.each_key do |key|
74
+ arr = []
75
+ if !doc.xpath("//#{key}").empty? #Returns an empty array if tag is not present
76
+ doc.xpath("//#{key}").map{ |tag| arr << sanitize(tag.text) }
77
+ end
78
+ fill_content(arr, key)
79
+ end
83
80
  end
84
81
 
85
82
  def sanitize(text)
86
- #Removes any escaped quotes, replaces them
87
- text.gsub(/"/, "'")
83
+ #Removes any escaped quotes, replaces them
84
+ text.gsub(/"/, "'").lstrip.chomp
85
+ end
86
+
87
+ def fill_content(arr, key)
88
+ #Doesn't shovel if there is no content found for the specific tag
89
+ !arr.empty? ? (@content.fetch(key) << arr) : nil
88
90
  end
89
91
  end
90
92
 
91
- test = Wriggler.new(["character", "content", "name", "title"], "/Users/47900/Desktop/Ruby/wriggler/dirtest", false)
93
+ test = Wriggler.new(["character", "content", "name", "title", "test"], "/Users/47900/Desktop/Ruby/wriggler/dirtest", false)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wriggler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elliott Young
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-06-07 00:00:00.000000000 Z
11
+ date: 2015-06-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -85,9 +85,11 @@ files:
85
85
  - Rakefile
86
86
  - bin/console
87
87
  - bin/setup
88
+ - dirtest/nested_fldr/test5.xml
88
89
  - dirtest/test1.xml
89
90
  - dirtest/test2.xml
90
91
  - dirtest/test3.xml
92
+ - dirtest/test4.html
91
93
  - lib/wriggler.rb
92
94
  - lib/wriggler/version.rb
93
95
  - test.rb