wriggler 1.4.0 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fb5bcfb711baec8080be58ec329c95066d4cbee4
4
- data.tar.gz: 3ff6eb28fd6f06f27398d48f4ada9d9808b73d86
3
+ metadata.gz: 6d5737be7d530672feb74afaf6c76a114e162fd0
4
+ data.tar.gz: 52cf928fe43f502489440a5c2fe71a3d204fc858
5
5
  SHA512:
6
- metadata.gz: 1f5fab9e467d49fd8b4f5806381501dccc6c08866af5f756c976ab1c12f42421125c90e79e0999397058b2d4162386bdee03e9a5091ca5e9043e8e8c209489e4
7
- data.tar.gz: 48f151ba4e2e2c42853f2fb06670ead00b7f44aa9ed660882c70ea02e6618862e2804408786eaa5197942c201610749ec5123a95d1a12727f6982e576f0a5822
6
+ metadata.gz: f5da62668ff4fb22d91f34632f06ddb89aa21d3cbee37bf9519006066cf06f688992dfe313c7dca59ad209321e6f1e8552485fbfe3568305d9a004419d1b4885
7
+ data.tar.gz: d437debf056c35d0ebe570776a4db226c8ef00d31d503c467046c5feca5d6a68f1bec680bf258ea949e17cbbcd99a1bf2fbbeebb7f6c03ed2d013b2dc39c933a
@@ -1,6 +1,7 @@
1
1
  require "wriggler/version"
2
2
  require "nokogiri"
3
3
  require "find"
4
+ require "utf8_utils"
4
5
 
5
6
  module Wriggler
6
7
  attr_reader :content, :directory
@@ -17,7 +18,7 @@ module Wriggler
17
18
 
18
19
  def self.navigate_directory
19
20
  #Set the cwd to the given dir send to gather all nested files from there
20
- Dir.chdir(@directory)
21
+ Dir.chdir(@directory)
21
22
  gather_files
22
23
  end
23
24
 
@@ -38,8 +39,6 @@ module Wriggler
38
39
  set_HTML(f)
39
40
  elsif is_XML?(file)
40
41
  set_XML(f)
41
- elsif is_TXT?(file)
42
- set_TXT(f)
43
42
  end
44
43
  end
45
44
 
@@ -53,11 +52,6 @@ module Wriggler
53
52
  file =~ /.xml/
54
53
  end
55
54
 
56
- def self.is_TXT?(file)
57
- #Determines, using a regex check, if it is a TXT file
58
- file =~ /.txt/
59
- end
60
-
61
55
  def self.set_HTML(file)
62
56
  #Set the HTML file into Nokogiri for crawling
63
57
  doc = Nokogiri::HTML(file)
@@ -70,49 +64,18 @@ module Wriggler
70
64
  crawl_file(doc)
71
65
  end
72
66
 
73
- def self.set_TXT(file)
74
- #Set the TXT file into a readable String for Regex checking
75
- doc = File.read(file)
76
- txt_content(doc)
77
- end
78
-
79
67
  def self.crawl_file(doc)
80
68
  #Crawl the Nokogiri Object for the file
81
69
  @content.each_key do |key|
82
70
  arr = []
83
- if !doc.xpath("//#{key}").empty?
84
- doc.xpath("//#{key}").map{ |tag| arr << sanitize(tag.text) }
85
- elsif key == "html"
86
- arr << "#{doc}"
87
- else
88
- arr << ""
89
- end
90
- @content.fetch(key) << arr
91
- end
92
- end
93
-
94
- def self.txt_content(doc)
95
- #Now run through the raw text and regex out what is inbetween the tags
96
- @content.each_key do |key|
97
- arr = []
98
- if key == "html"
71
+ if !doc.css("#{key}").empty?
72
+ doc.css("#{key}").map{ |tag| arr << sanitize(tag.text) }
73
+ elsif key == "html" || key == "xml"
99
74
  arr << "#{doc}"
100
- elsif contains_key(doc, key)
101
- arr << doc.slice(/<#{key}>(.*)<\/#{key}>/).gsub(/<\/?\w+>/, "")
102
75
  else
103
76
  arr << ""
104
77
  end
105
78
  @content.fetch(key) << arr
106
79
  end
107
80
  end
108
-
109
- def self.contains_key(doc, key)
110
- #Checks if the String contains the necessary tags
111
- doc.include?("<#{key}>") && doc.include?("</#{key}>")
112
- end
113
-
114
- def self.sanitize(text)
115
- #Removes any escaped quotes, replaces them
116
- text.gsub(/"/, "'").lstrip.chomp
117
- end
118
81
  end
@@ -1,3 +1,3 @@
1
1
  module Wriggler
2
- VERSION = "1.4.0"
2
+ VERSION = "1.5.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wriggler
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elliott Young
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-06-23 00:00:00.000000000 Z
11
+ date: 2016-01-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -99,12 +99,6 @@ files:
99
99
  - Rakefile
100
100
  - bin/console
101
101
  - bin/setup
102
- - dirtest/nested_fldr/test5.xml
103
- - dirtest/tag_content.csv
104
- - dirtest/test1.xml
105
- - dirtest/test2.xml
106
- - dirtest/test3.xml
107
- - dirtest/test4.html
108
102
  - lib/wriggler.rb
109
103
  - lib/wriggler/version.rb
110
104
  - wriggler.gemspec
@@ -129,7 +123,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
129
123
  version: '0'
130
124
  requirements: []
131
125
  rubyforge_project:
132
- rubygems_version: 2.4.7
126
+ rubygems_version: 2.2.2
133
127
  signing_key:
134
128
  specification_version: 4
135
129
  summary: A Gem designed to crawl through a local directory of HTML/XML files and pull
@@ -1 +0,0 @@
1
- <test>If this appears it works</test>
@@ -1,5 +0,0 @@
1
- character,test,name,sitcom
2
- "[""Al Bundy"", ""Bud Bundy"", ""Marcy Darcy"", ""Larry Appleton"", ""Balki Bartokomous"", ""John 'Hannibal' Smith"", ""Templeton 'Face' Peck"", ""'B.A.' Baracus"", ""'Howling Mad' Murdock""]","[""Al Bundy"", ""Bud Bundy"", ""Marcy Darcy"", ""Larry Appleton"", ""Balki Bartokomous"", ""John 'Hannibal' Smith"", ""Templeton 'Face' Peck"", ""'B.A.' Baracus"", ""'Howling Mad' Murdock""]","[""Al Bundy"", ""Bud Bundy"", ""Marcy Darcy"", ""Larry Appleton"", ""Balki Bartokomous"", ""John 'Hannibal' Smith"", ""Templeton 'Face' Peck"", ""'B.A.' Baracus"", ""'Howling Mad' Murdock""]"
3
- "[""If this appears it works""]","[""This is different""]"
4
- "[""Married with Children"", ""Perfect Strangers"", ""The A-Team""]","[""Married with Children"", ""Perfect Strangers"", ""The A-Team""]","[""Married with Children"", ""Perfect Strangers"", ""The A-Team""]"
5
- "[""This is different\n Married with Children\n \n Al Bundy\n Bud Bundy\n Marcy Darcy\n \n "", ""Perfect Strangers\n \n Larry Appleton\n Balki Bartokomous\n \n ""]","[""Married with Children\n \n Al Bundy\n Bud Bundy\n Marcy Darcy\n \n "", ""Perfect Strangers\n \n Larry Appleton\n Balki Bartokomous\n \n ""]","[""Married with Children\n \n Al Bundy\n Bud Bundy\n Marcy Darcy\n \n "", ""Perfect Strangers\n \n Larry Appleton\n Balki Bartokomous\n \n ""]"
@@ -1,31 +0,0 @@
1
- <root>
2
- <sitcoms>
3
- <sitcom>
4
- <test>This is different</test>
5
- <name>Married with Children</name>
6
- <characters>
7
- <character>Al Bundy</character>
8
- <character>Bud Bundy</character>
9
- <character>Marcy Darcy</character>
10
- </characters>
11
- </sitcom>
12
- <sitcom>
13
- <name>Perfect Strangers</name>
14
- <characters>
15
- <character>Larry Appleton</character>
16
- <character>Balki Bartokomous</character>
17
- </characters>
18
- </sitcom>
19
- </sitcoms>
20
- <dramas>
21
- <drama>
22
- <name>The A-Team</name>
23
- <characters>
24
- <character>John "Hannibal" Smith</character>
25
- <character>Templeton "Face" Peck</character>
26
- <character>"B.A." Baracus</character>
27
- <character>"Howling Mad" Murdock</character>
28
- </characters>
29
- </drama>
30
- </dramas>
31
- </root>
@@ -1,30 +0,0 @@
1
- <root>
2
- <sitcoms>
3
- <sitcom>
4
- <name>Married with Children</name>
5
- <characters>
6
- <character>Al Bundy</character>
7
- <character>Bud Bundy</character>
8
- <character>Marcy Darcy</character>
9
- </characters>
10
- </sitcom>
11
- <sitcom>
12
- <name>Perfect Strangers</name>
13
- <characters>
14
- <character>Larry Appleton</character>
15
- <character>Balki Bartokomous</character>
16
- </characters>
17
- </sitcom>
18
- </sitcoms>
19
- <dramas>
20
- <drama>
21
- <name>The A-Team</name>
22
- <characters>
23
- <character>John "Hannibal" Smith</character>
24
- <character>Templeton "Face" Peck</character>
25
- <character>"B.A." Baracus</character>
26
- <character>"Howling Mad" Murdock</character>
27
- </characters>
28
- </drama>
29
- </dramas>
30
- </root>
@@ -1,30 +0,0 @@
1
- <root>
2
- <sitcoms>
3
- <sitcom>
4
- <name>Married with Children</name>
5
- <characters>
6
- <character>Al Bundy</character>
7
- <character>Bud Bundy</character>
8
- <character>Marcy Darcy</character>
9
- </characters>
10
- </sitcom>
11
- <sitcom>
12
- <name>Perfect Strangers</name>
13
- <characters>
14
- <character>Larry Appleton</character>
15
- <character>Balki Bartokomous</character>
16
- </characters>
17
- </sitcom>
18
- </sitcoms>
19
- <dramas>
20
- <drama>
21
- <name>The A-Team</name>
22
- <characters>
23
- <character>John "Hannibal" Smith</character>
24
- <character>Templeton "Face" Peck</character>
25
- <character>"B.A." Baracus</character>
26
- <character>"Howling Mad" Murdock</character>
27
- </characters>
28
- </drama>
29
- </dramas>
30
- </root>
@@ -1,7 +0,0 @@
1
- <div id = "buttons">
2
- <button id="bye">Bye</button>
3
- <button id="hello">Hello</button>
4
- </div>
5
- <div>
6
- <p>1: <span id="greeting">Greeting</span></p>
7
- </div>