WebWordSorter 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,145 @@
1
+ require 'test/unit'
2
+ require 'WebWordSorter'
3
+ # Author:: John Z. Abrams (mailto:jzabrams@unm.edu)
4
+ # Copyright:: Copyright (c) 2014 John Z. Abrams
5
+ # License:: Distributed under the same terms as Ruby
6
+
7
+ ##
8
+ # = WebWordSorter Unit Test Class
9
+ #
10
+ #
11
+ # == Description
12
+ # This class is responsible for ensuring all methods of the WebWordSorter class are
13
+ # functioning properly. Any modifications to the current class methods should be
14
+ # tested using this class. Any additional methods added to the WedWordSorter class
15
+ # should also have a test designed for them to ensure future stability, and managable
16
+ # code.
17
+
18
+ class TestWWS < Test::Unit::TestCase
19
+
20
+ ##
21
+ # ===Test Description
22
+ # This method tests that all links of a page are properly being
23
+ # collected by the anemone crawler.
24
+ # http://www.example.com is crawled in a way known to be stable and its output is compared
25
+ # to the output from the WebWordSorter class.
26
+ def test_crawl
27
+
28
+ pages = 0
29
+
30
+ Anemone.crawl("http://www.example.com") do |anemone|
31
+ anemone.on_every_page do |page|
32
+
33
+ pages = (pages + 1)
34
+ end
35
+ end
36
+
37
+ expected = WebWordSorter.new.crawler ("http://www.example.com")
38
+ assert_equal expected.length, pages
39
+
40
+ end
41
+
42
+ ##
43
+ # ===Test Description
44
+ # This method tests that all webpages are prooerly being converted to strings.
45
+ # The two websites used here are avaliable for testing puropses exclusively. The combined stirng that
46
+ # should be returned from these sites is known, and is compared to that string that is returned by the
47
+ # WebWordSorter class.
48
+ def test_pages_to_string
49
+
50
+ test_array = ["http://129.24.149.151/test0.html", "http://129.24.149.151/test1.html" ]
51
+ test_string ="This is a test string for the caanes webpage word sort interview project.\ntest test test /!@$\n"
52
+
53
+ expected = WebWordSorter.new.pages_to_string test_array
54
+
55
+ assert_equal(expected, test_string)
56
+
57
+ end
58
+
59
+ ##
60
+ # ===Test Description
61
+ # This method tests to ensure all markup and charecters are parsed correctly.
62
+ # everything but letters and single spaces should be removed and returned.
63
+ # A string is given with a known output. The string is passed to the WebWordSorter
64
+ # class and what is returend is compared to the known correct output.
65
+ def test_parse_string
66
+
67
+ test_input = "Word dr. !@ %^&$( another word CAPS lowercase 1 2345 67 \n newline! oh no!"
68
+ test_string = "Word dr another word CAPS lowercase newline oh no "
69
+
70
+ expected = WebWordSorter.new.parse_string test_input
71
+
72
+ assert_equal(expected, test_string)
73
+
74
+ end
75
+
76
+
77
+ ##
78
+ # ===Test Description
79
+ # This method ensures that the conversion of the string to an array is correct.
80
+ # A string is given with a known aoutput array. The string is passed to the
81
+ # WebWordSorter class and then compared with the known correct output lenght
82
+ # to verify the string was properly split.
83
+ def test_spilt_uniq
84
+
85
+ test_input = "one two three three four five five five abc abc abc "
86
+
87
+ expected = WebWordSorter.new.split_uniq test_input
88
+
89
+ assert (expected.length == expected.uniq.length)
90
+
91
+ end
92
+
93
+ ##
94
+ # ===Test Description
95
+ # This method ensures that only true words (as determined by the dictionry file used)
96
+ # are kept in the final array to be sorted.
97
+ # An array of words and non words is given with an array or the known real words.
98
+ # The array of words and non words is passed to the WebWordSorter class and the
99
+ # result is compared to the array of known words. Test will fail unless they
100
+ # are identical.
101
+ def test_spell_check
102
+
103
+ test_input = ['valid','novalid','test','words','sukess','America', 'a', 'ke', 'I','o','probingisaclassofattackswhereanattackerscansanetworktogatherinformationor', 'borderradius']
104
+ test_array= ['valid','test','words','America','a','I','o']
105
+
106
+ expected = WebWordSorter.new.spell_check test_input
107
+
108
+ assert_equal(expected, test_array)
109
+ end
110
+
111
+
112
+ ##
113
+ # ===Test Description
114
+ # This method ensures that the final output is sorted properly.
115
+ # An unsorted array is passed to an insrance of the WebWord sorter and the output is compared
116
+ # to a known sorted version of the given array.
117
+ def test_stooge_sort
118
+
119
+ test_input = ['apple','orange','pear','grape','watermelon','fig','tomato','date']
120
+ test_array= ['fig','pear','date','grape','apple','tomato','orange', 'watermelon']
121
+
122
+ expected = WebWordSorter.new.stooge_sort test_input
123
+
124
+ assert_equal(expected, test_array)
125
+
126
+ end
127
+
128
+ end
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+
137
+
138
+
139
+
140
+
141
+
142
+
143
+
144
+
145
+
metadata ADDED
@@ -0,0 +1,137 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: WebWordSorter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.2
5
+ platform: ruby
6
+ authors:
7
+ - John Z. Abrams
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-04-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: anemone
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: nokogiri
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: faraday
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: pdf-reader
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: ruby-progressbar
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: colorize
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: "This library # This class contains all the necessary methods to do the
98
+ following:\n\t-Crawl most websites and return an array of their URLS.\n\t-Convert
99
+ all HTML and most web linked PDF documents to one large string given an array of
100
+ urls.\n\t-Parse out all non words and non human sensible markup.\n\t-Stooge Sort
101
+ an array of words via Iteration, NOT recursion. NOTE: Current verison\n\t IS using
102
+ recursive stooge sort!\n\t-Write array of words to a new file."
103
+ email:
104
+ - jzabrams@unm.edu
105
+ executables: []
106
+ extensions: []
107
+ extra_rdoc_files: []
108
+ files:
109
+ - lib/WebWordSorter.rb
110
+ - lib/examples/example.rb
111
+ - lib/resources/words.txt
112
+ - lib/test/WWS_test_cases.rb
113
+ homepage: http://webwordsorter.info
114
+ licenses:
115
+ - MIT
116
+ metadata: {}
117
+ post_install_message:
118
+ rdoc_options: []
119
+ require_paths:
120
+ - lib
121
+ required_ruby_version: !ruby/object:Gem::Requirement
122
+ requirements:
123
+ - - '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
126
+ required_rubygems_version: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - '>='
129
+ - !ruby/object:Gem::Version
130
+ version: '0'
131
+ requirements: []
132
+ rubyforge_project:
133
+ rubygems_version: 2.2.2
134
+ signing_key:
135
+ specification_version: 4
136
+ summary: Crawl retreive and sort words from websites.
137
+ test_files: []