siteseeker_normalizer 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,347 @@
1
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
2
+ <html>
3
+ <head>
4
+ <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
5
+ <meta name="ROBOTS" content="NOINDEX,NOFOLLOW" />
6
+ <title>Malm&ouml; stad: s&ouml;k &rdquo;barnomsrg&rdquo;</title>
7
+
8
+ <link rel="stylesheet" href="http://foo.appliance.siteseeker.se/search/bar/?p=ext&amp;theme=modular&amp;doc=style.css&amp;i=sv" type="text/css" >
9
+ <link rel="stylesheet" href="http://foo.appliance.siteseeker.se/search/bar/?p=ext&amp;theme=modular&amp;doc=jquery.autocomplete.css&amp;i=sv" type="text/css" >
10
+ <script type="text/javascript" src="http://foo.appliance.siteseeker.se/search/bar/?p=ext&amp;theme=modular&amp;doc=jquery-1.3.2.min.js&amp;i=sv"></script>
11
+ <script type="text/javascript" src="http://foo.appliance.siteseeker.se/search/bar/?p=ext&amp;theme=modular&amp;doc=jquery.autocomplete.1.2.1-siteseeker.js&amp;i=sv"></script>
12
+ <script type="text/javascript" src="http://foo.appliance.siteseeker.se/search/bar/?p=ext&amp;theme=modular&amp;doc=script.js&amp;i=sv"></script>
13
+ <script type="text/javascript">var enableQueryCompletion = true; var searchFieldId = 'essi-queryfield'; var language = 'sv'; var qcUrl = 'http://foo.appliance.siteseeker.se/qc/bar/'; var searchUrl = 'http://foo.appliance.siteseeker.se/search/bar/';</script>
14
+
15
+ <meta name="keywords" content="">
16
+ <meta name="description" content="Malm� stads officiella webbplats. Official website of the city of Malm�. V�lkommen till Malm� stad! Welcome to the City of Malm� in Sweden!">
17
+ <meta http-equiv="imagetoolbar" content="no">
18
+ <link rel="schema.DC" href="http://purl.org/DC/elements/1.0">
19
+ </head>
20
+ <body>
21
+ <table cellspacing="5">
22
+ <tr>
23
+ <td valign="top" width="250">
24
+ <form id="essi-mainform" method="get"
25
+ action="http://foo.appliance.siteseeker.se/search/bar/">
26
+
27
+ <div id="essi-query-block">
28
+ <label for="essi-queryfield"><strong>S&ouml;k efter:</strong></label>
29
+ <input type="text" name="q" value="barnomsrg" tabindex="1" id="essi-queryfield" autocomplete="off" >
30
+
31
+
32
+ <input type="submit" name="x" tabindex="2" id="essi-search-button"
33
+ value="Hitta!" >
34
+
35
+
36
+
37
+
38
+
39
+
40
+
41
+ <a href="http://foo.appliance.siteseeker.se/search/bar/?q=barnomsrg&amp;t=simple&amp;ls=2&amp;d=0&amp;d1=01&amp;d2=1&amp;d3=1970&amp;d4=26&amp;d5=11&amp;d6=2013&amp;s=0&amp;so=1&amp;h=0&amp;hn=20&amp;hd=1&amp;i=sv&amp;p=helppopup&amp;b=1&amp;c=0&amp;t=s&amp;l=0&amp;ll=-2&amp;f=0&amp;ff=0&amp;oenc=UTF-8&amp;ua=f111c901872e2ee9f3fea7552d9dde4d" tabindex="3" onclick="essOpenWindow('http://foo.appliance.siteseeker.se/search/bar/?q=barnomsrg&amp;t=simple&amp;ls=2&amp;d=0&amp;d1=01&amp;d2=1&amp;d3=1970&amp;d4=26&amp;d5=11&amp;d6=2013&amp;s=0&amp;so=1&amp;h=0&amp;hn=20&amp;hd=1&amp;i=sv&amp;p=helppopup&amp;b=1&amp;c=0&amp;t=s&amp;l=0&amp;ll=-2&amp;f=0&amp;ff=0&amp;oenc=UTF-8&amp;ua=f111c901872e2ee9f3fea7552d9dde4d', 'siteseekerHelpPopupWindow', false, 500, 500); return false;"
42
+ id="essi-helplink" title="S&ouml;khj&auml;lp (&ouml;ppnas i nytt f&ouml;nster)">Hj&auml;lp</a>
43
+
44
+
45
+
46
+ </div>
47
+
48
+ <div id="essi-options">
49
+ <input type="hidden" name="i" value="sv" >
50
+ <input type="hidden" name="s" value="1" >
51
+ <input type="hidden" name="so" value="1" >
52
+ <input type="hidden" name="ua" value="f111c901872e2ee9f3fea7552d9dde4d" >
53
+ <input type="hidden" name="charset" value="UTF-8" >
54
+ <input type="hidden" name="oenc" value="UTF-8" >
55
+ <input type="hidden" name="origin" value="" >
56
+
57
+ <!-- CATEGORY -->
58
+
59
+ <div class="ess-group" id="essi-opt-category">
60
+ <p><input type="radio" name="c" value="0" id="essi-catall" checked="checked" >
61
+ <strong><label for="essi-catall">S&ouml;k dokument p&aring; hela webbplatsen</label></strong></p>
62
+ <p><input type="radio" name="c" value="1" id="essi-catselect" >
63
+ <label for="essi-catselect">Avgr&auml;nsa till:</label></p>
64
+ <div class="ess-option-block">
65
+ <div>
66
+
67
+ </div>
68
+ <div class="ess-cat-group-with-cats" id="essi-cg-medborgare">
69
+ <p class="ess-cat-group">Medborgare:</p>
70
+
71
+ <p>
72
+ <input type="checkbox" id="essi-cc-119" name="cc[]"
73
+ value="119" onclick="essEnableCats(this)" >
74
+ <label for="essi-cc-119">Kultur &amp; nöje</label>
75
+ </p>
76
+
77
+ <p>
78
+ <input type="checkbox" id="essi-cc-116" name="cc[]"
79
+ value="116" onclick="essEnableCats(this)" >
80
+ <label for="essi-cc-116">Social- &amp; familjefrågor</label>
81
+ </p>
82
+
83
+ <p>
84
+ <input type="checkbox" id="essi-cc-125" name="cc[]"
85
+ value="125" onclick="essEnableCats(this)" >
86
+ <label for="essi-cc-125">Biblioteken</label>
87
+ </p>
88
+
89
+ <p>
90
+ <input type="checkbox" id="essi-cc-121" name="cc[]"
91
+ value="121" onclick="essEnableCats(this)" >
92
+ <label for="essi-cc-121">Jobb &amp; praktik</label>
93
+ </p>
94
+
95
+ <p>
96
+ <input type="checkbox" id="essi-cc-123" name="cc[]"
97
+ value="123" onclick="essEnableCats(this)" >
98
+ <label for="essi-cc-123">Förskola &amp; utbildning</label>
99
+ </p>
100
+
101
+ <p>
102
+ <input type="checkbox" id="essi-cc-115" name="cc[]"
103
+ value="115" onclick="essEnableCats(this)" >
104
+ <label for="essi-cc-115">Stadsplanering &amp; trafik</label>
105
+ </p>
106
+
107
+ <p>
108
+ <input type="checkbox" id="essi-cc-118" name="cc[]"
109
+ value="118" onclick="essEnableCats(this)" >
110
+ <label for="essi-cc-118">Miljö &amp; hållbarhet</label>
111
+ </p>
112
+
113
+ <p>
114
+ <input type="checkbox" id="essi-cc-117" name="cc[]"
115
+ value="117" onclick="essEnableCats(this)" >
116
+ <label for="essi-cc-117">Omsorg, vård &amp; stöd</label>
117
+ </p>
118
+
119
+ <p>
120
+ <input type="checkbox" id="essi-cc-124" name="cc[]"
121
+ value="124" onclick="essEnableCats(this)" >
122
+ <label for="essi-cc-124">Bo &amp; bygga</label>
123
+ </p>
124
+
125
+ <p>
126
+ <input type="checkbox" id="essi-cc-122" name="cc[]"
127
+ value="122" onclick="essEnableCats(this)" >
128
+ <label for="essi-cc-122">Idrott &amp; fritid</label>
129
+ </p>
130
+
131
+ </div>
132
+ <div class="ess-cat-group-with-cats" id="essi-cg-vriga_delgrenar">
133
+ <p class="ess-cat-group">Övriga delgrenar:</p>
134
+
135
+ <p>
136
+ <input type="checkbox" id="essi-cc-114" name="cc[]"
137
+ value="114" onclick="essEnableCats(this)" >
138
+ <label for="essi-cc-114">Företagare</label>
139
+ </p>
140
+
141
+ <p>
142
+ <input type="checkbox" id="essi-cc-112" name="cc[]"
143
+ value="112" onclick="essEnableCats(this)" >
144
+ <label for="essi-cc-112">Kommun &amp; politik</label>
145
+ </p>
146
+
147
+ <p>
148
+ <input type="checkbox" id="essi-cc-113" name="cc[]"
149
+ value="113" onclick="essEnableCats(this)" >
150
+ <label for="essi-cc-113">Turist</label>
151
+ </p>
152
+
153
+ </div>
154
+ <div class="ess-cat-group-with-cats" id="essi-cg-kategori">
155
+ <p class="ess-cat-group">Kategori:</p>
156
+
157
+ <p>
158
+ <input type="checkbox" id="essi-cc-0" name="cc[]"
159
+ value="0" onclick="essEnableCats(this)" >
160
+ <label for="essi-cc-0">Övriga</label>
161
+ </p>
162
+
163
+ <p>
164
+ <input type="checkbox" id="essi-cc-110" name="cc[]"
165
+ value="110" onclick="essEnableCats(this)" >
166
+ <label for="essi-cc-110">Miljöbarometern</label>
167
+ </p>
168
+
169
+
170
+ </div>
171
+ </div>
172
+ <p class="ess-clear"></p>
173
+ </div>
174
+
175
+
176
+ <!-- FORMAT -->
177
+
178
+ <div class="ess-group" id="essi-opt-format">
179
+ <p><input type="radio" name="f" value="0" checked="checked" id="essi-filter-doctype-any" >
180
+ <strong><label for="essi-filter-doctype-any">S&ouml;k dokument av alla typer</label></strong></p>
181
+
182
+
183
+ <p><input type="radio" name="f" value="1" id="essi-filter-doctype" >
184
+ <label for="essi-filter-doctype">Endast detta format</label>:</p>
185
+ <p class="ess-option-block">
186
+ <select name="ff" onfocus="essEnableFilter('essi-filter-doctype')" title="Endast detta format"
187
+ id="essi-filter-doctype-sel" onclick="essEnableFilter('essi-filter-doctype')">
188
+ <option value="1">webbsidor</option>
189
+ <option value="3">PDF</option>
190
+ <option value="5">Word</option>
191
+ <option value="6">Excel</option>
192
+ <option value="7">PowerPoint</option>
193
+
194
+ </select>
195
+ </p>
196
+
197
+
198
+ <p><input type="radio" name="f" value="2" id="essi-filter-doctype-image" >
199
+ <label for="essi-filter-doctype-image">Bilder</label></p>
200
+
201
+ </div>
202
+
203
+
204
+ <!-- DATE -->
205
+
206
+ <div class="ess-group" id="essi-opt-date">
207
+ <p><input type="radio" name="da" value="0" checked="checked" id="essi-filter-date-any" >
208
+ <strong><label for="essi-filter-date-any">S&ouml;k dokument &auml;ndrade n&auml;r som helst</label></strong></p>
209
+ <p><input type="radio" name="da" value="1" id="essi-filter-date-week" >
210
+ <label for="essi-filter-date-week">Senaste veckan</label></p>
211
+ <p><input type="radio" name="da" value="2" id="essi-filter-date-month" >
212
+ <label for="essi-filter-date-month">Senaste m&aring;naden</label></p>
213
+ <p><input type="radio" name="da" value="3" id="essi-filter-date-year" >
214
+ <label for="essi-filter-date-year">Senaste &aring;ret</label>
215
+ <input type="hidden" name="d" value="1" >
216
+ </p>
217
+ </div>
218
+
219
+
220
+ <!-- LANGUAGE -->
221
+
222
+ <div class="ess-group" id="essi-opt-lang">
223
+ <p><input type="radio" name="l" value="0" checked="checked" id="essi-filter-lang-any" >
224
+ <strong><label for="essi-filter-lang-any">S&ouml;k dokument p&aring; alla spr&aring;k</label></strong></p>
225
+ <p><input type="radio" name="l" value="1" id="essi-filter-lang" >
226
+ <label for="essi-filter-lang">Endast p&aring;</label>:
227
+ </p>
228
+ <p class="ess-option-block">
229
+ <select name="ll" onfocus="essEnableFilter('essi-filter-lang')" title="Endast p&aring;"
230
+ id="essi-filter-lang-sel" onclick="essEnableFilter('essi-filter-lang')">
231
+ <option value="-1">alla språk</option>
232
+ <option value="5">svenska</option>
233
+ <option value="0">danska</option>
234
+ <option value="1">tyska</option>
235
+ <option value="2">engelska</option>
236
+ <option value="3">franska</option>
237
+ <option value="6">spanska</option>
238
+ <option value="11">ryska</option>
239
+ <option value="14">polska</option>
240
+ <option value="15">kroatiska</option>
241
+ <option value="17">turkiska</option>
242
+ <option value="21">rumänska</option>
243
+ <option value="40">albanska</option>
244
+
245
+ </select>
246
+ </p>
247
+ </div>
248
+
249
+
250
+ <!-- STEMMING -->
251
+
252
+
253
+ <div class="ess-group" id="essi-search-button-2-div">
254
+ <input type="submit" name="x" tabindex="2" id="essi-search-button-2"
255
+ value="Hitta!" >
256
+ </div>
257
+
258
+ </div>
259
+
260
+ </form>
261
+
262
+ </td>
263
+ <td valign="top">
264
+
265
+
266
+
267
+ <div class="ess-result">
268
+
269
+ <h2 class="ess-topcell">
270
+ <strong class="ess-header">Resultat:</strong>
271
+ <strong><span id="essi-hitcount">Inga</span></strong> <span id="essi-hitname">träffar</span> på <strong>barnomsrg</strong><span id="essi-wholesite-prep"> inom <strong id="essi-wholesite">Hela Malmö stads webbplats</strong></span>
272
+ </h2>
273
+
274
+
275
+
276
+
277
+ </div>
278
+
279
+
280
+
281
+
282
+
283
+
284
+
285
+
286
+ <div class="ess-nohits">
287
+ <div class="ess-spelling">
288
+ <span class="ess-qmark">?</span>
289
+
290
+ <h3 class="ess-helpheadline">S&ouml;kfr&aring;gan kan vara felstavad</h3>
291
+
292
+ <p>Menade du <strong><strong>barnomsorg</strong></strong> eller <strong><strong>backgrounds</strong></strong>?</p>
293
+
294
+ <ul>
295
+
296
+ <li>
297
+ Jag vill s&ouml;ka efter
298
+ <a href="http://foo.appliance.siteseeker.se/search/bar/?q=barnomsorg&amp;t=simple&amp;ls=2&amp;d=0&amp;d1=01&amp;d2=1&amp;d3=1970&amp;d4=26&amp;d5=11&amp;d6=2013&amp;s=0&amp;so=1&amp;h=0&amp;hn=20&amp;hd=1&amp;i=sv&amp;sc=click&amp;p=&amp;b=1&amp;c=0&amp;t=s&amp;l=0&amp;ll=-2&amp;f=0&amp;ff=0&amp;oenc=UTF-8&amp;ua=f111c901872e2ee9f3fea7552d9dde4d"><strong>barnomsorg</strong></a>.
299
+ </li>
300
+
301
+ <li>
302
+ Jag vill s&ouml;ka efter
303
+ <a href="http://foo.appliance.siteseeker.se/search/bar/?q=backgrounds&amp;t=simple&amp;ls=2&amp;d=0&amp;d1=01&amp;d2=1&amp;d3=1970&amp;d4=26&amp;d5=11&amp;d6=2013&amp;s=0&amp;so=1&amp;h=0&amp;hn=20&amp;hd=1&amp;i=sv&amp;sc=click&amp;p=&amp;b=1&amp;c=0&amp;t=s&amp;l=0&amp;ll=-2&amp;f=0&amp;ff=0&amp;oenc=UTF-8&amp;ua=f111c901872e2ee9f3fea7552d9dde4d"><strong>backgrounds</strong></a>.
304
+ </li>
305
+
306
+ </ul>
307
+
308
+ </div>
309
+ </div>
310
+
311
+
312
+
313
+
314
+
315
+ <dl class="ess-hits ess-hit">
316
+
317
+
318
+
319
+
320
+
321
+ </dl>
322
+
323
+
324
+
325
+
326
+
327
+
328
+
329
+
330
+ <div id="essi-footer-logo">
331
+ <br >
332
+ <a href="http://www.siteseeker.se/">
333
+ <img src="http://foo.appliance.siteseeker.se/images/modular/poweredbysiteseeker.gif" alt="S&ouml;kning levererad av Euroling SiteSeeker"
334
+ style="border: none;" ></a>
335
+ </div>
336
+
337
+
338
+ <script type="text/javascript" src="http://foo.appliance.siteseeker.se/click/bar/?ua=f111c901872e2ee9f3fea7552d9dde4d&amp;pageloading=1"></script>
339
+
340
+ </td>
341
+ <td valign="top">
342
+
343
+ </td>
344
+ </tr>
345
+ </table>
346
+ </body>
347
+ </html>
@@ -0,0 +1,69 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'spec_helper'
3
+
4
+ describe SiteseekerNormalizer do
5
+ before(:each) do
6
+ raw_results = open("spec/fixtures/barn.html").read
7
+ @results = SiteseekerNormalizer::Parse.new(raw_results, encoding: "UTF-8")
8
+ end
9
+
10
+ it "should have a number of hits" do
11
+ @results.total.should be_a Fixnum
12
+ end
13
+
14
+ it "should have results" do
15
+ @results.entries.count.should > 0
16
+ end
17
+
18
+ describe "result entry" do
19
+ it "should have an order number" do
20
+ @results.entries.first.number.should eq 1
21
+ end
22
+
23
+ it "should have a title" do
24
+ @results.entries.first.title.should be_a String
25
+ end
26
+
27
+ it "should have an extract" do
28
+ @results.entries.first.summary.should be_a String
29
+ end
30
+
31
+ it "should have a breadcrumb" do
32
+ @results.entries.first.breadcrumbs.should be_an Array
33
+ end
34
+
35
+ it "should have a category" do
36
+ @results.entries.first.category.should be_a String
37
+ end
38
+
39
+ it "should have a date string" do
40
+ @results.entries.first.date.should be_a String
41
+ end
42
+ end
43
+
44
+ it "should have sorting" do
45
+ @results.sorting.should be_an Array
46
+ end
47
+
48
+ it "should have a first sorting entry with text" do
49
+ @results.sorting.first.text.should be_a String
50
+ end
51
+
52
+ it "should have a second sorting entry with an url" do
53
+ @results.sorting[1].query.should be_a String
54
+ end
55
+
56
+ it "should have a query string for getting more results" do
57
+ @results.more_query.should be_a String
58
+ end
59
+
60
+ it "should have a categories" do
61
+ @results.category_groups.should be_an Array
62
+ end
63
+
64
+ it "should show a spelling suggestions" do
65
+ raw_results = open("spec/fixtures/barnomsrg.html").read
66
+ results = SiteseekerNormalizer::Parse.new(raw_results, encoding: "UTF-8")
67
+ results.suggestions.count.should > 0
68
+ end
69
+ end
@@ -0,0 +1,2 @@
1
+ require "siteseeker_normalizer"
2
+ require 'nokogiri'
metadata ADDED
@@ -0,0 +1,122 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: siteseeker_normalizer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.6
5
+ platform: ruby
6
+ authors:
7
+ - martent
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-11-26 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.5'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.5'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '1.3'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '1.3'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: Siteseeker integration library
70
+ email:
71
+ - marten@thavenius.se
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - .gitignore
77
+ - .rspec
78
+ - COPYING
79
+ - Gemfile
80
+ - README.md
81
+ - Rakefile
82
+ - lib/siteseeker_normalizer.rb
83
+ - lib/siteseeker_normalizer/client.rb
84
+ - lib/siteseeker_normalizer/parse.rb
85
+ - lib/siteseeker_normalizer/parse/category.rb
86
+ - lib/siteseeker_normalizer/parse/entry.rb
87
+ - lib/siteseeker_normalizer/version.rb
88
+ - siteseeker_normalizer.gemspec
89
+ - spec/fixtures/barn.html
90
+ - spec/fixtures/barnomsrg.html
91
+ - spec/siteseeeker_normalizer_spec.rb
92
+ - spec/spec_helper.rb
93
+ homepage: https://github.com/malmostad/siteseeker_normalizer
94
+ licenses:
95
+ - AGPL v3
96
+ metadata: {}
97
+ post_install_message:
98
+ rdoc_options: []
99
+ require_paths:
100
+ - lib
101
+ required_ruby_version: !ruby/object:Gem::Requirement
102
+ requirements:
103
+ - - '>='
104
+ - !ruby/object:Gem::Version
105
+ version: '0'
106
+ required_rubygems_version: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - '>='
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ requirements: []
112
+ rubyforge_project:
113
+ rubygems_version: 2.1.11
114
+ signing_key:
115
+ specification_version: 4
116
+ summary: A Ruby Gem for making requests and parsing the response from Siteseeker to
117
+ a structured object.
118
+ test_files:
119
+ - spec/fixtures/barn.html
120
+ - spec/fixtures/barnomsrg.html
121
+ - spec/siteseeeker_normalizer_spec.rb
122
+ - spec/spec_helper.rb