WebWordSorter 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/WebWordSorter.rb +249 -0
- data/lib/examples/example.rb +49 -0
- data/lib/resources/words.txt +235887 -0
- data/lib/test/WWS_test_cases.rb +145 -0
- metadata +137 -0
@@ -0,0 +1,145 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'WebWordSorter'
|
3
|
+
# Author:: John Z. Abrams (mailto:jzabrams@unm.edu)
|
4
|
+
# Copyright:: Copyright (c) 2014 John Z. Abrams
|
5
|
+
# License:: Distributed under the same terms as Ruby
|
6
|
+
|
7
|
+
##
|
8
|
+
# = WebWordSorter Unit Test Class
|
9
|
+
#
|
10
|
+
#
|
11
|
+
# == Description
|
12
|
+
# This class is responsible for ensuring all methods of the WebWordSorter class are
|
13
|
+
# functioning properly. Any modifications to the current class methods should be
|
14
|
+
# tested using this class. Any additional methods added to the WedWordSorter class
|
15
|
+
# should also have a test designed for them to ensure future stability, and managable
|
16
|
+
# code.
|
17
|
+
|
18
|
+
class TestWWS < Test::Unit::TestCase
|
19
|
+
|
20
|
+
##
|
21
|
+
# ===Test Description
|
22
|
+
# This method tests that all links of a page are properly being
|
23
|
+
# collected by the anemone crawler.
|
24
|
+
# http://www.example.com is crawled in a way known to be stable and its output is compared
|
25
|
+
# to the output from the WebWordSorter class.
|
26
|
+
def test_crawl
|
27
|
+
|
28
|
+
pages = 0
|
29
|
+
|
30
|
+
Anemone.crawl("http://www.example.com") do |anemone|
|
31
|
+
anemone.on_every_page do |page|
|
32
|
+
|
33
|
+
pages = (pages + 1)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
expected = WebWordSorter.new.crawler ("http://www.example.com")
|
38
|
+
assert_equal expected.length, pages
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
##
|
43
|
+
# ===Test Description
|
44
|
+
# This method tests that all webpages are prooerly being converted to strings.
|
45
|
+
# The two websites used here are avaliable for testing puropses exclusively. The combined stirng that
|
46
|
+
# should be returned from these sites is known, and is compared to that string that is returned by the
|
47
|
+
# WebWordSorter class.
|
48
|
+
def test_pages_to_string
|
49
|
+
|
50
|
+
test_array = ["http://129.24.149.151/test0.html", "http://129.24.149.151/test1.html" ]
|
51
|
+
test_string ="This is a test string for the caanes webpage word sort interview project.\ntest test test /!@$\n"
|
52
|
+
|
53
|
+
expected = WebWordSorter.new.pages_to_string test_array
|
54
|
+
|
55
|
+
assert_equal(expected, test_string)
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
##
|
60
|
+
# ===Test Description
|
61
|
+
# This method tests to ensure all markup and charecters are parsed correctly.
|
62
|
+
# everything but letters and single spaces should be removed and returned.
|
63
|
+
# A string is given with a known output. The string is passed to the WebWordSorter
|
64
|
+
# class and what is returend is compared to the known correct output.
|
65
|
+
def test_parse_string
|
66
|
+
|
67
|
+
test_input = "Word dr. !@ %^&$( another word CAPS lowercase 1 2345 67 \n newline! oh no!"
|
68
|
+
test_string = "Word dr another word CAPS lowercase newline oh no "
|
69
|
+
|
70
|
+
expected = WebWordSorter.new.parse_string test_input
|
71
|
+
|
72
|
+
assert_equal(expected, test_string)
|
73
|
+
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
##
|
78
|
+
# ===Test Description
|
79
|
+
# This method ensures that the conversion of the string to an array is correct.
|
80
|
+
# A string is given with a known aoutput array. The string is passed to the
|
81
|
+
# WebWordSorter class and then compared with the known correct output lenght
|
82
|
+
# to verify the string was properly split.
|
83
|
+
def test_spilt_uniq
|
84
|
+
|
85
|
+
test_input = "one two three three four five five five abc abc abc "
|
86
|
+
|
87
|
+
expected = WebWordSorter.new.split_uniq test_input
|
88
|
+
|
89
|
+
assert (expected.length == expected.uniq.length)
|
90
|
+
|
91
|
+
end
|
92
|
+
|
93
|
+
##
|
94
|
+
# ===Test Description
|
95
|
+
# This method ensures that only true words (as determined by the dictionry file used)
|
96
|
+
# are kept in the final array to be sorted.
|
97
|
+
# An array of words and non words is given with an array or the known real words.
|
98
|
+
# The array of words and non words is passed to the WebWordSorter class and the
|
99
|
+
# result is compared to the array of known words. Test will fail unless they
|
100
|
+
# are identical.
|
101
|
+
def test_spell_check
|
102
|
+
|
103
|
+
test_input = ['valid','novalid','test','words','sukess','America', 'a', 'ke', 'I','o','probingisaclassofattackswhereanattackerscansanetworktogatherinformationor', 'borderradius']
|
104
|
+
test_array= ['valid','test','words','America','a','I','o']
|
105
|
+
|
106
|
+
expected = WebWordSorter.new.spell_check test_input
|
107
|
+
|
108
|
+
assert_equal(expected, test_array)
|
109
|
+
end
|
110
|
+
|
111
|
+
|
112
|
+
##
|
113
|
+
# ===Test Description
|
114
|
+
# This method ensures that the final output is sorted properly.
|
115
|
+
# An unsorted array is passed to an insrance of the WebWord sorter and the output is compared
|
116
|
+
# to a known sorted version of the given array.
|
117
|
+
def test_stooge_sort
|
118
|
+
|
119
|
+
test_input = ['apple','orange','pear','grape','watermelon','fig','tomato','date']
|
120
|
+
test_array= ['fig','pear','date','grape','apple','tomato','orange', 'watermelon']
|
121
|
+
|
122
|
+
expected = WebWordSorter.new.stooge_sort test_input
|
123
|
+
|
124
|
+
assert_equal(expected, test_array)
|
125
|
+
|
126
|
+
end
|
127
|
+
|
128
|
+
end
|
129
|
+
|
130
|
+
|
131
|
+
|
132
|
+
|
133
|
+
|
134
|
+
|
135
|
+
|
136
|
+
|
137
|
+
|
138
|
+
|
139
|
+
|
140
|
+
|
141
|
+
|
142
|
+
|
143
|
+
|
144
|
+
|
145
|
+
|
metadata
ADDED
@@ -0,0 +1,137 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: WebWordSorter
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- John Z. Abrams
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-04-03 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: anemone
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: nokogiri
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: faraday
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: pdf-reader
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: ruby-progressbar
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: colorize
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - '>='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - '>='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
description: "This library # This class contains all the necessary methods to do the
|
98
|
+
following:\n\t-Crawl most websites and return an array of their URLS.\n\t-Convert
|
99
|
+
all HTML and most web linked PDF documents to one large string given an array of
|
100
|
+
urls.\n\t-Parse out all non words and non human sensible markup.\n\t-Stooge Sort
|
101
|
+
an array of words via Iteration, NOT recursion. NOTE: Current verison\n\t IS using
|
102
|
+
recursive stooge sort!\n\t-Write array of words to a new file."
|
103
|
+
email:
|
104
|
+
- jzabrams@unm.edu
|
105
|
+
executables: []
|
106
|
+
extensions: []
|
107
|
+
extra_rdoc_files: []
|
108
|
+
files:
|
109
|
+
- lib/WebWordSorter.rb
|
110
|
+
- lib/examples/example.rb
|
111
|
+
- lib/resources/words.txt
|
112
|
+
- lib/test/WWS_test_cases.rb
|
113
|
+
homepage: http://webwordsorter.info
|
114
|
+
licenses:
|
115
|
+
- MIT
|
116
|
+
metadata: {}
|
117
|
+
post_install_message:
|
118
|
+
rdoc_options: []
|
119
|
+
require_paths:
|
120
|
+
- lib
|
121
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
122
|
+
requirements:
|
123
|
+
- - '>='
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '0'
|
126
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
127
|
+
requirements:
|
128
|
+
- - '>='
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
version: '0'
|
131
|
+
requirements: []
|
132
|
+
rubyforge_project:
|
133
|
+
rubygems_version: 2.2.2
|
134
|
+
signing_key:
|
135
|
+
specification_version: 4
|
136
|
+
summary: Crawl retreive and sort words from websites.
|
137
|
+
test_files: []
|