wriggler 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fb5bcfb711baec8080be58ec329c95066d4cbee4
4
- data.tar.gz: 3ff6eb28fd6f06f27398d48f4ada9d9808b73d86
3
+ metadata.gz: 6d5737be7d530672feb74afaf6c76a114e162fd0
4
+ data.tar.gz: 52cf928fe43f502489440a5c2fe71a3d204fc858
5
5
  SHA512:
6
- metadata.gz: 1f5fab9e467d49fd8b4f5806381501dccc6c08866af5f756c976ab1c12f42421125c90e79e0999397058b2d4162386bdee03e9a5091ca5e9043e8e8c209489e4
7
- data.tar.gz: 48f151ba4e2e2c42853f2fb06670ead00b7f44aa9ed660882c70ea02e6618862e2804408786eaa5197942c201610749ec5123a95d1a12727f6982e576f0a5822
6
+ metadata.gz: f5da62668ff4fb22d91f34632f06ddb89aa21d3cbee37bf9519006066cf06f688992dfe313c7dca59ad209321e6f1e8552485fbfe3568305d9a004419d1b4885
7
+ data.tar.gz: d437debf056c35d0ebe570776a4db226c8ef00d31d503c467046c5feca5d6a68f1bec680bf258ea949e17cbbcd99a1bf2fbbeebb7f6c03ed2d013b2dc39c933a
@@ -1,6 +1,7 @@
1
1
  require "wriggler/version"
2
2
  require "nokogiri"
3
3
  require "find"
4
+ require "utf8_utils"
4
5
 
5
6
  module Wriggler
6
7
  attr_reader :content, :directory
@@ -17,7 +18,7 @@ module Wriggler
17
18
 
18
19
  def self.navigate_directory
19
20
  #Set the cwd to the given dir send to gather all nested files from there
20
- Dir.chdir(@directory)
21
+ Dir.chdir(@directory)
21
22
  gather_files
22
23
  end
23
24
 
@@ -38,8 +39,6 @@ module Wriggler
38
39
  set_HTML(f)
39
40
  elsif is_XML?(file)
40
41
  set_XML(f)
41
- elsif is_TXT?(file)
42
- set_TXT(f)
43
42
  end
44
43
  end
45
44
 
@@ -53,11 +52,6 @@ module Wriggler
53
52
  file =~ /.xml/
54
53
  end
55
54
 
56
- def self.is_TXT?(file)
57
- #Determines, using a regex check, if it is a TXT file
58
- file =~ /.txt/
59
- end
60
-
61
55
  def self.set_HTML(file)
62
56
  #Set the HTML file into Nokogiri for crawling
63
57
  doc = Nokogiri::HTML(file)
@@ -70,49 +64,18 @@ module Wriggler
70
64
  crawl_file(doc)
71
65
  end
72
66
 
73
- def self.set_TXT(file)
74
- #Set the TXT file into a readable String for Regex checking
75
- doc = File.read(file)
76
- txt_content(doc)
77
- end
78
-
79
67
  def self.crawl_file(doc)
80
68
  #Crawl the Nokogiri Object for the file
81
69
  @content.each_key do |key|
82
70
  arr = []
83
- if !doc.xpath("//#{key}").empty?
84
- doc.xpath("//#{key}").map{ |tag| arr << sanitize(tag.text) }
85
- elsif key == "html"
86
- arr << "#{doc}"
87
- else
88
- arr << ""
89
- end
90
- @content.fetch(key) << arr
91
- end
92
- end
93
-
94
- def self.txt_content(doc)
95
- #Now run through the raw text and regex out what is inbetween the tags
96
- @content.each_key do |key|
97
- arr = []
98
- if key == "html"
71
+ if !doc.css("#{key}").empty?
72
+ doc.css("#{key}").map{ |tag| arr << sanitize(tag.text) }
73
+ elsif key == "html" || key == "xml"
99
74
  arr << "#{doc}"
100
- elsif contains_key(doc, key)
101
- arr << doc.slice(/<#{key}>(.*)<\/#{key}>/).gsub(/<\/?\w+>/, "")
102
75
  else
103
76
  arr << ""
104
77
  end
105
78
  @content.fetch(key) << arr
106
79
  end
107
80
  end
108
-
109
- def self.contains_key(doc, key)
110
- #Checks if the String contains the necessary tags
111
- doc.include?("<#{key}>") && doc.include?("</#{key}>")
112
- end
113
-
114
- def self.sanitize(text)
115
- #Removes any escaped quotes, replaces them
116
- text.gsub(/"/, "'").lstrip.chomp
117
- end
118
81
  end
@@ -1,3 +1,3 @@
1
1
  module Wriggler
2
- VERSION = "1.4.0"
2
+ VERSION = "1.5.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wriggler
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elliott Young
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-06-23 00:00:00.000000000 Z
11
+ date: 2016-01-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -99,12 +99,6 @@ files:
99
99
  - Rakefile
100
100
  - bin/console
101
101
  - bin/setup
102
- - dirtest/nested_fldr/test5.xml
103
- - dirtest/tag_content.csv
104
- - dirtest/test1.xml
105
- - dirtest/test2.xml
106
- - dirtest/test3.xml
107
- - dirtest/test4.html
108
102
  - lib/wriggler.rb
109
103
  - lib/wriggler/version.rb
110
104
  - wriggler.gemspec
@@ -129,7 +123,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
129
123
  version: '0'
130
124
  requirements: []
131
125
  rubyforge_project:
132
- rubygems_version: 2.4.7
126
+ rubygems_version: 2.2.2
133
127
  signing_key:
134
128
  specification_version: 4
135
129
  summary: A Gem designed to crawl through a local directory of HTML/XML files and pull
@@ -1 +0,0 @@
1
- <test>If this appears it works</test>
@@ -1,5 +0,0 @@
1
- character,test,name,sitcom
2
- "[""Al Bundy"", ""Bud Bundy"", ""Marcy Darcy"", ""Larry Appleton"", ""Balki Bartokomous"", ""John 'Hannibal' Smith"", ""Templeton 'Face' Peck"", ""'B.A.' Baracus"", ""'Howling Mad' Murdock""]","[""Al Bundy"", ""Bud Bundy"", ""Marcy Darcy"", ""Larry Appleton"", ""Balki Bartokomous"", ""John 'Hannibal' Smith"", ""Templeton 'Face' Peck"", ""'B.A.' Baracus"", ""'Howling Mad' Murdock""]","[""Al Bundy"", ""Bud Bundy"", ""Marcy Darcy"", ""Larry Appleton"", ""Balki Bartokomous"", ""John 'Hannibal' Smith"", ""Templeton 'Face' Peck"", ""'B.A.' Baracus"", ""'Howling Mad' Murdock""]"
3
- "[""If this appears it works""]","[""This is different""]"
4
- "[""Married with Children"", ""Perfect Strangers"", ""The A-Team""]","[""Married with Children"", ""Perfect Strangers"", ""The A-Team""]","[""Married with Children"", ""Perfect Strangers"", ""The A-Team""]"
5
- "[""This is different\n Married with Children\n \n Al Bundy\n Bud Bundy\n Marcy Darcy\n \n "", ""Perfect Strangers\n \n Larry Appleton\n Balki Bartokomous\n \n ""]","[""Married with Children\n \n Al Bundy\n Bud Bundy\n Marcy Darcy\n \n "", ""Perfect Strangers\n \n Larry Appleton\n Balki Bartokomous\n \n ""]","[""Married with Children\n \n Al Bundy\n Bud Bundy\n Marcy Darcy\n \n "", ""Perfect Strangers\n \n Larry Appleton\n Balki Bartokomous\n \n ""]"
@@ -1,31 +0,0 @@
1
- <root>
2
- <sitcoms>
3
- <sitcom>
4
- <test>This is different</test>
5
- <name>Married with Children</name>
6
- <characters>
7
- <character>Al Bundy</character>
8
- <character>Bud Bundy</character>
9
- <character>Marcy Darcy</character>
10
- </characters>
11
- </sitcom>
12
- <sitcom>
13
- <name>Perfect Strangers</name>
14
- <characters>
15
- <character>Larry Appleton</character>
16
- <character>Balki Bartokomous</character>
17
- </characters>
18
- </sitcom>
19
- </sitcoms>
20
- <dramas>
21
- <drama>
22
- <name>The A-Team</name>
23
- <characters>
24
- <character>John "Hannibal" Smith</character>
25
- <character>Templeton "Face" Peck</character>
26
- <character>"B.A." Baracus</character>
27
- <character>"Howling Mad" Murdock</character>
28
- </characters>
29
- </drama>
30
- </dramas>
31
- </root>
@@ -1,30 +0,0 @@
1
- <root>
2
- <sitcoms>
3
- <sitcom>
4
- <name>Married with Children</name>
5
- <characters>
6
- <character>Al Bundy</character>
7
- <character>Bud Bundy</character>
8
- <character>Marcy Darcy</character>
9
- </characters>
10
- </sitcom>
11
- <sitcom>
12
- <name>Perfect Strangers</name>
13
- <characters>
14
- <character>Larry Appleton</character>
15
- <character>Balki Bartokomous</character>
16
- </characters>
17
- </sitcom>
18
- </sitcoms>
19
- <dramas>
20
- <drama>
21
- <name>The A-Team</name>
22
- <characters>
23
- <character>John "Hannibal" Smith</character>
24
- <character>Templeton "Face" Peck</character>
25
- <character>"B.A." Baracus</character>
26
- <character>"Howling Mad" Murdock</character>
27
- </characters>
28
- </drama>
29
- </dramas>
30
- </root>
@@ -1,30 +0,0 @@
1
- <root>
2
- <sitcoms>
3
- <sitcom>
4
- <name>Married with Children</name>
5
- <characters>
6
- <character>Al Bundy</character>
7
- <character>Bud Bundy</character>
8
- <character>Marcy Darcy</character>
9
- </characters>
10
- </sitcom>
11
- <sitcom>
12
- <name>Perfect Strangers</name>
13
- <characters>
14
- <character>Larry Appleton</character>
15
- <character>Balki Bartokomous</character>
16
- </characters>
17
- </sitcom>
18
- </sitcoms>
19
- <dramas>
20
- <drama>
21
- <name>The A-Team</name>
22
- <characters>
23
- <character>John "Hannibal" Smith</character>
24
- <character>Templeton "Face" Peck</character>
25
- <character>"B.A." Baracus</character>
26
- <character>"Howling Mad" Murdock</character>
27
- </characters>
28
- </drama>
29
- </dramas>
30
- </root>
@@ -1,7 +0,0 @@
1
- <div id = "buttons">
2
- <button id="bye">Bye</button>
3
- <button id="hello">Hello</button>
4
- </div>
5
- <div>
6
- <p>1: <span id="greeting">Greeting</span></p>
7
- </div>