WebWordSorter 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/WebWordSorter.rb +249 -0
- data/lib/examples/example.rb +49 -0
- data/lib/resources/words.txt +235887 -0
- data/lib/test/WWS_test_cases.rb +145 -0
- metadata +137 -0
@@ -0,0 +1,145 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'WebWordSorter'
|
3
|
+
# Author:: John Z. Abrams (mailto:jzabrams@unm.edu)
|
4
|
+
# Copyright:: Copyright (c) 2014 John Z. Abrams
|
5
|
+
# License:: Distributed under the same terms as Ruby
|
6
|
+
|
7
|
+
##
|
8
|
+
# = WebWordSorter Unit Test Class
|
9
|
+
#
|
10
|
+
#
|
11
|
+
# == Description
|
12
|
+
# This class is responsible for ensuring all methods of the WebWordSorter class are
|
13
|
+
# functioning properly. Any modifications to the current class methods should be
|
14
|
+
# tested using this class. Any additional methods added to the WedWordSorter class
|
15
|
+
# should also have a test designed for them to ensure future stability, and managable
|
16
|
+
# code.
|
17
|
+
|
18
|
+
class TestWWS < Test::Unit::TestCase
|
19
|
+
|
20
|
+
##
|
21
|
+
# ===Test Description
|
22
|
+
# This method tests that all links of a page are properly being
|
23
|
+
# collected by the anemone crawler.
|
24
|
+
# http://www.example.com is crawled in a way known to be stable and its output is compared
|
25
|
+
# to the output from the WebWordSorter class.
|
26
|
+
def test_crawl
|
27
|
+
|
28
|
+
pages = 0
|
29
|
+
|
30
|
+
Anemone.crawl("http://www.example.com") do |anemone|
|
31
|
+
anemone.on_every_page do |page|
|
32
|
+
|
33
|
+
pages = (pages + 1)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
expected = WebWordSorter.new.crawler ("http://www.example.com")
|
38
|
+
assert_equal expected.length, pages
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
##
|
43
|
+
# ===Test Description
|
44
|
+
# This method tests that all webpages are prooerly being converted to strings.
|
45
|
+
# The two websites used here are avaliable for testing puropses exclusively. The combined stirng that
|
46
|
+
# should be returned from these sites is known, and is compared to that string that is returned by the
|
47
|
+
# WebWordSorter class.
|
48
|
+
def test_pages_to_string
|
49
|
+
|
50
|
+
test_array = ["http://129.24.149.151/test0.html", "http://129.24.149.151/test1.html" ]
|
51
|
+
test_string ="This is a test string for the caanes webpage word sort interview project.\ntest test test /!@$\n"
|
52
|
+
|
53
|
+
expected = WebWordSorter.new.pages_to_string test_array
|
54
|
+
|
55
|
+
assert_equal(expected, test_string)
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
##
|
60
|
+
# ===Test Description
|
61
|
+
# This method tests to ensure all markup and charecters are parsed correctly.
|
62
|
+
# everything but letters and single spaces should be removed and returned.
|
63
|
+
# A string is given with a known output. The string is passed to the WebWordSorter
|
64
|
+
# class and what is returend is compared to the known correct output.
|
65
|
+
def test_parse_string
|
66
|
+
|
67
|
+
test_input = "Word dr. !@ %^&$( another word CAPS lowercase 1 2345 67 \n newline! oh no!"
|
68
|
+
test_string = "Word dr another word CAPS lowercase newline oh no "
|
69
|
+
|
70
|
+
expected = WebWordSorter.new.parse_string test_input
|
71
|
+
|
72
|
+
assert_equal(expected, test_string)
|
73
|
+
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
##
|
78
|
+
# ===Test Description
|
79
|
+
# This method ensures that the conversion of the string to an array is correct.
|
80
|
+
# A string is given with a known aoutput array. The string is passed to the
|
81
|
+
# WebWordSorter class and then compared with the known correct output lenght
|
82
|
+
# to verify the string was properly split.
|
83
|
+
def test_spilt_uniq
|
84
|
+
|
85
|
+
test_input = "one two three three four five five five abc abc abc "
|
86
|
+
|
87
|
+
expected = WebWordSorter.new.split_uniq test_input
|
88
|
+
|
89
|
+
assert (expected.length == expected.uniq.length)
|
90
|
+
|
91
|
+
end
|
92
|
+
|
93
|
+
##
|
94
|
+
# ===Test Description
|
95
|
+
# This method ensures that only true words (as determined by the dictionry file used)
|
96
|
+
# are kept in the final array to be sorted.
|
97
|
+
# An array of words and non words is given with an array or the known real words.
|
98
|
+
# The array of words and non words is passed to the WebWordSorter class and the
|
99
|
+
# result is compared to the array of known words. Test will fail unless they
|
100
|
+
# are identical.
|
101
|
+
def test_spell_check
|
102
|
+
|
103
|
+
test_input = ['valid','novalid','test','words','sukess','America', 'a', 'ke', 'I','o','probingisaclassofattackswhereanattackerscansanetworktogatherinformationor', 'borderradius']
|
104
|
+
test_array= ['valid','test','words','America','a','I','o']
|
105
|
+
|
106
|
+
expected = WebWordSorter.new.spell_check test_input
|
107
|
+
|
108
|
+
assert_equal(expected, test_array)
|
109
|
+
end
|
110
|
+
|
111
|
+
|
112
|
+
##
|
113
|
+
# ===Test Description
|
114
|
+
# This method ensures that the final output is sorted properly.
|
115
|
+
# An unsorted array is passed to an insrance of the WebWord sorter and the output is compared
|
116
|
+
# to a known sorted version of the given array.
|
117
|
+
def test_stooge_sort
|
118
|
+
|
119
|
+
test_input = ['apple','orange','pear','grape','watermelon','fig','tomato','date']
|
120
|
+
test_array= ['fig','pear','date','grape','apple','tomato','orange', 'watermelon']
|
121
|
+
|
122
|
+
expected = WebWordSorter.new.stooge_sort test_input
|
123
|
+
|
124
|
+
assert_equal(expected, test_array)
|
125
|
+
|
126
|
+
end
|
127
|
+
|
128
|
+
end
|
129
|
+
|
130
|
+
|
131
|
+
|
132
|
+
|
133
|
+
|
134
|
+
|
135
|
+
|
136
|
+
|
137
|
+
|
138
|
+
|
139
|
+
|
140
|
+
|
141
|
+
|
142
|
+
|
143
|
+
|
144
|
+
|
145
|
+
|
metadata
ADDED
@@ -0,0 +1,137 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: WebWordSorter
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- John Z. Abrams
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-04-03 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: anemone
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: nokogiri
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: faraday
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: pdf-reader
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: ruby-progressbar
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: colorize
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - '>='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - '>='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
description: "This library # This class contains all the necessary methods to do the
|
98
|
+
following:\n\t-Crawl most websites and return an array of their URLS.\n\t-Convert
|
99
|
+
all HTML and most web linked PDF documents to one large string given an array of
|
100
|
+
urls.\n\t-Parse out all non words and non human sensible markup.\n\t-Stooge Sort
|
101
|
+
an array of words via Iteration, NOT recursion. NOTE: Current verison\n\t IS using
|
102
|
+
recursive stooge sort!\n\t-Write array of words to a new file."
|
103
|
+
email:
|
104
|
+
- jzabrams@unm.edu
|
105
|
+
executables: []
|
106
|
+
extensions: []
|
107
|
+
extra_rdoc_files: []
|
108
|
+
files:
|
109
|
+
- lib/WebWordSorter.rb
|
110
|
+
- lib/examples/example.rb
|
111
|
+
- lib/resources/words.txt
|
112
|
+
- lib/test/WWS_test_cases.rb
|
113
|
+
homepage: http://webwordsorter.info
|
114
|
+
licenses:
|
115
|
+
- MIT
|
116
|
+
metadata: {}
|
117
|
+
post_install_message:
|
118
|
+
rdoc_options: []
|
119
|
+
require_paths:
|
120
|
+
- lib
|
121
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
122
|
+
requirements:
|
123
|
+
- - '>='
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '0'
|
126
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
127
|
+
requirements:
|
128
|
+
- - '>='
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
version: '0'
|
131
|
+
requirements: []
|
132
|
+
rubyforge_project:
|
133
|
+
rubygems_version: 2.2.2
|
134
|
+
signing_key:
|
135
|
+
specification_version: 4
|
136
|
+
summary: Crawl retreive and sort words from websites.
|
137
|
+
test_files: []
|