WebWordSorter 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,145 @@
1
+ require 'test/unit'
2
+ require 'WebWordSorter'
3
+ # Author:: John Z. Abrams (mailto:jzabrams@unm.edu)
4
+ # Copyright:: Copyright (c) 2014 John Z. Abrams
5
+ # License:: Distributed under the same terms as Ruby
6
+
7
+ ##
8
+ # = WebWordSorter Unit Test Class
9
+ #
10
+ #
11
+ # == Description
12
+ # This class is responsible for ensuring all methods of the WebWordSorter class are
13
+ # functioning properly. Any modifications to the current class methods should be
14
+ # tested using this class. Any additional methods added to the WedWordSorter class
15
+ # should also have a test designed for them to ensure future stability, and managable
16
+ # code.
17
+
18
+ class TestWWS < Test::Unit::TestCase
19
+
20
+ ##
21
+ # ===Test Description
22
+ # This method tests that all links of a page are properly being
23
+ # collected by the anemone crawler.
24
+ # http://www.example.com is crawled in a way known to be stable and its output is compared
25
+ # to the output from the WebWordSorter class.
26
+ def test_crawl
27
+
28
+ pages = 0
29
+
30
+ Anemone.crawl("http://www.example.com") do |anemone|
31
+ anemone.on_every_page do |page|
32
+
33
+ pages = (pages + 1)
34
+ end
35
+ end
36
+
37
+ expected = WebWordSorter.new.crawler ("http://www.example.com")
38
+ assert_equal expected.length, pages
39
+
40
+ end
41
+
42
+ ##
43
+ # ===Test Description
44
+ # This method tests that all webpages are prooerly being converted to strings.
45
+ # The two websites used here are avaliable for testing puropses exclusively. The combined stirng that
46
+ # should be returned from these sites is known, and is compared to that string that is returned by the
47
+ # WebWordSorter class.
48
+ def test_pages_to_string
49
+
50
+ test_array = ["http://129.24.149.151/test0.html", "http://129.24.149.151/test1.html" ]
51
+ test_string ="This is a test string for the caanes webpage word sort interview project.\ntest test test /!@$\n"
52
+
53
+ expected = WebWordSorter.new.pages_to_string test_array
54
+
55
+ assert_equal(expected, test_string)
56
+
57
+ end
58
+
59
+ ##
60
+ # ===Test Description
61
+ # This method tests to ensure all markup and charecters are parsed correctly.
62
+ # everything but letters and single spaces should be removed and returned.
63
+ # A string is given with a known output. The string is passed to the WebWordSorter
64
+ # class and what is returend is compared to the known correct output.
65
+ def test_parse_string
66
+
67
+ test_input = "Word dr. !@ %^&$( another word CAPS lowercase 1 2345 67 \n newline! oh no!"
68
+ test_string = "Word dr another word CAPS lowercase newline oh no "
69
+
70
+ expected = WebWordSorter.new.parse_string test_input
71
+
72
+ assert_equal(expected, test_string)
73
+
74
+ end
75
+
76
+
77
+ ##
78
+ # ===Test Description
79
+ # This method ensures that the conversion of the string to an array is correct.
80
+ # A string is given with a known aoutput array. The string is passed to the
81
+ # WebWordSorter class and then compared with the known correct output lenght
82
+ # to verify the string was properly split.
83
+ def test_spilt_uniq
84
+
85
+ test_input = "one two three three four five five five abc abc abc "
86
+
87
+ expected = WebWordSorter.new.split_uniq test_input
88
+
89
+ assert (expected.length == expected.uniq.length)
90
+
91
+ end
92
+
93
+ ##
94
+ # ===Test Description
95
+ # This method ensures that only true words (as determined by the dictionry file used)
96
+ # are kept in the final array to be sorted.
97
+ # An array of words and non words is given with an array or the known real words.
98
+ # The array of words and non words is passed to the WebWordSorter class and the
99
+ # result is compared to the array of known words. Test will fail unless they
100
+ # are identical.
101
+ def test_spell_check
102
+
103
+ test_input = ['valid','novalid','test','words','sukess','America', 'a', 'ke', 'I','o','probingisaclassofattackswhereanattackerscansanetworktogatherinformationor', 'borderradius']
104
+ test_array= ['valid','test','words','America','a','I','o']
105
+
106
+ expected = WebWordSorter.new.spell_check test_input
107
+
108
+ assert_equal(expected, test_array)
109
+ end
110
+
111
+
112
+ ##
113
+ # ===Test Description
114
+ # This method ensures that the final output is sorted properly.
115
+ # An unsorted array is passed to an insrance of the WebWord sorter and the output is compared
116
+ # to a known sorted version of the given array.
117
+ def test_stooge_sort
118
+
119
+ test_input = ['apple','orange','pear','grape','watermelon','fig','tomato','date']
120
+ test_array= ['fig','pear','date','grape','apple','tomato','orange', 'watermelon']
121
+
122
+ expected = WebWordSorter.new.stooge_sort test_input
123
+
124
+ assert_equal(expected, test_array)
125
+
126
+ end
127
+
128
+ end
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+
137
+
138
+
139
+
140
+
141
+
142
+
143
+
144
+
145
+
metadata ADDED
@@ -0,0 +1,137 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: WebWordSorter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.2
5
+ platform: ruby
6
+ authors:
7
+ - John Z. Abrams
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-04-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: anemone
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: nokogiri
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: faraday
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: pdf-reader
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: ruby-progressbar
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: colorize
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: "This library # This class contains all the necessary methods to do the
98
+ following:\n\t-Crawl most websites and return an array of their URLS.\n\t-Convert
99
+ all HTML and most web linked PDF documents to one large string given an array of
100
+ urls.\n\t-Parse out all non words and non human sensible markup.\n\t-Stooge Sort
101
+ an array of words via Iteration, NOT recursion. NOTE: Current verison\n\t IS using
102
+ recursive stooge sort!\n\t-Write array of words to a new file."
103
+ email:
104
+ - jzabrams@unm.edu
105
+ executables: []
106
+ extensions: []
107
+ extra_rdoc_files: []
108
+ files:
109
+ - lib/WebWordSorter.rb
110
+ - lib/examples/example.rb
111
+ - lib/resources/words.txt
112
+ - lib/test/WWS_test_cases.rb
113
+ homepage: http://webwordsorter.info
114
+ licenses:
115
+ - MIT
116
+ metadata: {}
117
+ post_install_message:
118
+ rdoc_options: []
119
+ require_paths:
120
+ - lib
121
+ required_ruby_version: !ruby/object:Gem::Requirement
122
+ requirements:
123
+ - - '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
126
+ required_rubygems_version: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - '>='
129
+ - !ruby/object:Gem::Version
130
+ version: '0'
131
+ requirements: []
132
+ rubyforge_project:
133
+ rubygems_version: 2.2.2
134
+ signing_key:
135
+ specification_version: 4
136
+ summary: Crawl retreive and sort words from websites.
137
+ test_files: []