siteseeker_normalizer 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,347 @@
1
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
2
+ <html>
3
+ <head>
4
+ <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
5
+ <meta name="ROBOTS" content="NOINDEX,NOFOLLOW" />
6
+ <title>Malm&ouml; stad: s&ouml;k &rdquo;barnomsrg&rdquo;</title>
7
+
8
+ <link rel="stylesheet" href="http://foo.appliance.siteseeker.se/search/bar/?p=ext&amp;theme=modular&amp;doc=style.css&amp;i=sv" type="text/css" >
9
+ <link rel="stylesheet" href="http://foo.appliance.siteseeker.se/search/bar/?p=ext&amp;theme=modular&amp;doc=jquery.autocomplete.css&amp;i=sv" type="text/css" >
10
+ <script type="text/javascript" src="http://foo.appliance.siteseeker.se/search/bar/?p=ext&amp;theme=modular&amp;doc=jquery-1.3.2.min.js&amp;i=sv"></script>
11
+ <script type="text/javascript" src="http://foo.appliance.siteseeker.se/search/bar/?p=ext&amp;theme=modular&amp;doc=jquery.autocomplete.1.2.1-siteseeker.js&amp;i=sv"></script>
12
+ <script type="text/javascript" src="http://foo.appliance.siteseeker.se/search/bar/?p=ext&amp;theme=modular&amp;doc=script.js&amp;i=sv"></script>
13
+ <script type="text/javascript">var enableQueryCompletion = true; var searchFieldId = 'essi-queryfield'; var language = 'sv'; var qcUrl = 'http://foo.appliance.siteseeker.se/qc/bar/'; var searchUrl = 'http://foo.appliance.siteseeker.se/search/bar/';</script>
14
+
15
+ <meta name="keywords" content="">
16
+ <meta name="description" content="Malm� stads officiella webbplats. Official website of the city of Malm�. V�lkommen till Malm� stad! Welcome to the City of Malm� in Sweden!">
17
+ <meta http-equiv="imagetoolbar" content="no">
18
+ <link rel="schema.DC" href="http://purl.org/DC/elements/1.0">
19
+ </head>
20
+ <body>
21
+ <table cellspacing="5">
22
+ <tr>
23
+ <td valign="top" width="250">
24
+ <form id="essi-mainform" method="get"
25
+ action="http://foo.appliance.siteseeker.se/search/bar/">
26
+
27
+ <div id="essi-query-block">
28
+ <label for="essi-queryfield"><strong>S&ouml;k efter:</strong></label>
29
+ <input type="text" name="q" value="barnomsrg" tabindex="1" id="essi-queryfield" autocomplete="off" >
30
+
31
+
32
+ <input type="submit" name="x" tabindex="2" id="essi-search-button"
33
+ value="Hitta!" >
34
+
35
+
36
+
37
+
38
+
39
+
40
+
41
+ <a href="http://foo.appliance.siteseeker.se/search/bar/?q=barnomsrg&amp;t=simple&amp;ls=2&amp;d=0&amp;d1=01&amp;d2=1&amp;d3=1970&amp;d4=26&amp;d5=11&amp;d6=2013&amp;s=0&amp;so=1&amp;h=0&amp;hn=20&amp;hd=1&amp;i=sv&amp;p=helppopup&amp;b=1&amp;c=0&amp;t=s&amp;l=0&amp;ll=-2&amp;f=0&amp;ff=0&amp;oenc=UTF-8&amp;ua=f111c901872e2ee9f3fea7552d9dde4d" tabindex="3" onclick="essOpenWindow('http://foo.appliance.siteseeker.se/search/bar/?q=barnomsrg&amp;t=simple&amp;ls=2&amp;d=0&amp;d1=01&amp;d2=1&amp;d3=1970&amp;d4=26&amp;d5=11&amp;d6=2013&amp;s=0&amp;so=1&amp;h=0&amp;hn=20&amp;hd=1&amp;i=sv&amp;p=helppopup&amp;b=1&amp;c=0&amp;t=s&amp;l=0&amp;ll=-2&amp;f=0&amp;ff=0&amp;oenc=UTF-8&amp;ua=f111c901872e2ee9f3fea7552d9dde4d', 'siteseekerHelpPopupWindow', false, 500, 500); return false;"
42
+ id="essi-helplink" title="S&ouml;khj&auml;lp (&ouml;ppnas i nytt f&ouml;nster)">Hj&auml;lp</a>
43
+
44
+
45
+
46
+ </div>
47
+
48
+ <div id="essi-options">
49
+ <input type="hidden" name="i" value="sv" >
50
+ <input type="hidden" name="s" value="1" >
51
+ <input type="hidden" name="so" value="1" >
52
+ <input type="hidden" name="ua" value="f111c901872e2ee9f3fea7552d9dde4d" >
53
+ <input type="hidden" name="charset" value="UTF-8" >
54
+ <input type="hidden" name="oenc" value="UTF-8" >
55
+ <input type="hidden" name="origin" value="" >
56
+
57
+ <!-- CATEGORY -->
58
+
59
+ <div class="ess-group" id="essi-opt-category">
60
+ <p><input type="radio" name="c" value="0" id="essi-catall" checked="checked" >
61
+ <strong><label for="essi-catall">S&ouml;k dokument p&aring; hela webbplatsen</label></strong></p>
62
+ <p><input type="radio" name="c" value="1" id="essi-catselect" >
63
+ <label for="essi-catselect">Avgr&auml;nsa till:</label></p>
64
+ <div class="ess-option-block">
65
+ <div>
66
+
67
+ </div>
68
+ <div class="ess-cat-group-with-cats" id="essi-cg-medborgare">
69
+ <p class="ess-cat-group">Medborgare:</p>
70
+
71
+ <p>
72
+ <input type="checkbox" id="essi-cc-119" name="cc[]"
73
+ value="119" onclick="essEnableCats(this)" >
74
+ <label for="essi-cc-119">Kultur &amp; nöje</label>
75
+ </p>
76
+
77
+ <p>
78
+ <input type="checkbox" id="essi-cc-116" name="cc[]"
79
+ value="116" onclick="essEnableCats(this)" >
80
+ <label for="essi-cc-116">Social- &amp; familjefrågor</label>
81
+ </p>
82
+
83
+ <p>
84
+ <input type="checkbox" id="essi-cc-125" name="cc[]"
85
+ value="125" onclick="essEnableCats(this)" >
86
+ <label for="essi-cc-125">Biblioteken</label>
87
+ </p>
88
+
89
+ <p>
90
+ <input type="checkbox" id="essi-cc-121" name="cc[]"
91
+ value="121" onclick="essEnableCats(this)" >
92
+ <label for="essi-cc-121">Jobb &amp; praktik</label>
93
+ </p>
94
+
95
+ <p>
96
+ <input type="checkbox" id="essi-cc-123" name="cc[]"
97
+ value="123" onclick="essEnableCats(this)" >
98
+ <label for="essi-cc-123">Förskola &amp; utbildning</label>
99
+ </p>
100
+
101
+ <p>
102
+ <input type="checkbox" id="essi-cc-115" name="cc[]"
103
+ value="115" onclick="essEnableCats(this)" >
104
+ <label for="essi-cc-115">Stadsplanering &amp; trafik</label>
105
+ </p>
106
+
107
+ <p>
108
+ <input type="checkbox" id="essi-cc-118" name="cc[]"
109
+ value="118" onclick="essEnableCats(this)" >
110
+ <label for="essi-cc-118">Miljö &amp; hållbarhet</label>
111
+ </p>
112
+
113
+ <p>
114
+ <input type="checkbox" id="essi-cc-117" name="cc[]"
115
+ value="117" onclick="essEnableCats(this)" >
116
+ <label for="essi-cc-117">Omsorg, vård &amp; stöd</label>
117
+ </p>
118
+
119
+ <p>
120
+ <input type="checkbox" id="essi-cc-124" name="cc[]"
121
+ value="124" onclick="essEnableCats(this)" >
122
+ <label for="essi-cc-124">Bo &amp; bygga</label>
123
+ </p>
124
+
125
+ <p>
126
+ <input type="checkbox" id="essi-cc-122" name="cc[]"
127
+ value="122" onclick="essEnableCats(this)" >
128
+ <label for="essi-cc-122">Idrott &amp; fritid</label>
129
+ </p>
130
+
131
+ </div>
132
+ <div class="ess-cat-group-with-cats" id="essi-cg-vriga_delgrenar">
133
+ <p class="ess-cat-group">Övriga delgrenar:</p>
134
+
135
+ <p>
136
+ <input type="checkbox" id="essi-cc-114" name="cc[]"
137
+ value="114" onclick="essEnableCats(this)" >
138
+ <label for="essi-cc-114">Företagare</label>
139
+ </p>
140
+
141
+ <p>
142
+ <input type="checkbox" id="essi-cc-112" name="cc[]"
143
+ value="112" onclick="essEnableCats(this)" >
144
+ <label for="essi-cc-112">Kommun &amp; politik</label>
145
+ </p>
146
+
147
+ <p>
148
+ <input type="checkbox" id="essi-cc-113" name="cc[]"
149
+ value="113" onclick="essEnableCats(this)" >
150
+ <label for="essi-cc-113">Turist</label>
151
+ </p>
152
+
153
+ </div>
154
+ <div class="ess-cat-group-with-cats" id="essi-cg-kategori">
155
+ <p class="ess-cat-group">Kategori:</p>
156
+
157
+ <p>
158
+ <input type="checkbox" id="essi-cc-0" name="cc[]"
159
+ value="0" onclick="essEnableCats(this)" >
160
+ <label for="essi-cc-0">Övriga</label>
161
+ </p>
162
+
163
+ <p>
164
+ <input type="checkbox" id="essi-cc-110" name="cc[]"
165
+ value="110" onclick="essEnableCats(this)" >
166
+ <label for="essi-cc-110">Miljöbarometern</label>
167
+ </p>
168
+
169
+
170
+ </div>
171
+ </div>
172
+ <p class="ess-clear"></p>
173
+ </div>
174
+
175
+
176
+ <!-- FORMAT -->
177
+
178
+ <div class="ess-group" id="essi-opt-format">
179
+ <p><input type="radio" name="f" value="0" checked="checked" id="essi-filter-doctype-any" >
180
+ <strong><label for="essi-filter-doctype-any">S&ouml;k dokument av alla typer</label></strong></p>
181
+
182
+
183
+ <p><input type="radio" name="f" value="1" id="essi-filter-doctype" >
184
+ <label for="essi-filter-doctype">Endast detta format</label>:</p>
185
+ <p class="ess-option-block">
186
+ <select name="ff" onfocus="essEnableFilter('essi-filter-doctype')" title="Endast detta format"
187
+ id="essi-filter-doctype-sel" onclick="essEnableFilter('essi-filter-doctype')">
188
+ <option value="1">webbsidor</option>
189
+ <option value="3">PDF</option>
190
+ <option value="5">Word</option>
191
+ <option value="6">Excel</option>
192
+ <option value="7">PowerPoint</option>
193
+
194
+ </select>
195
+ </p>
196
+
197
+
198
+ <p><input type="radio" name="f" value="2" id="essi-filter-doctype-image" >
199
+ <label for="essi-filter-doctype-image">Bilder</label></p>
200
+
201
+ </div>
202
+
203
+
204
+ <!-- DATE -->
205
+
206
+ <div class="ess-group" id="essi-opt-date">
207
+ <p><input type="radio" name="da" value="0" checked="checked" id="essi-filter-date-any" >
208
+ <strong><label for="essi-filter-date-any">S&ouml;k dokument &auml;ndrade n&auml;r som helst</label></strong></p>
209
+ <p><input type="radio" name="da" value="1" id="essi-filter-date-week" >
210
+ <label for="essi-filter-date-week">Senaste veckan</label></p>
211
+ <p><input type="radio" name="da" value="2" id="essi-filter-date-month" >
212
+ <label for="essi-filter-date-month">Senaste m&aring;naden</label></p>
213
+ <p><input type="radio" name="da" value="3" id="essi-filter-date-year" >
214
+ <label for="essi-filter-date-year">Senaste &aring;ret</label>
215
+ <input type="hidden" name="d" value="1" >
216
+ </p>
217
+ </div>
218
+
219
+
220
+ <!-- LANGUAGE -->
221
+
222
+ <div class="ess-group" id="essi-opt-lang">
223
+ <p><input type="radio" name="l" value="0" checked="checked" id="essi-filter-lang-any" >
224
+ <strong><label for="essi-filter-lang-any">S&ouml;k dokument p&aring; alla spr&aring;k</label></strong></p>
225
+ <p><input type="radio" name="l" value="1" id="essi-filter-lang" >
226
+ <label for="essi-filter-lang">Endast p&aring;</label>:
227
+ </p>
228
+ <p class="ess-option-block">
229
+ <select name="ll" onfocus="essEnableFilter('essi-filter-lang')" title="Endast p&aring;"
230
+ id="essi-filter-lang-sel" onclick="essEnableFilter('essi-filter-lang')">
231
+ <option value="-1">alla språk</option>
232
+ <option value="5">svenska</option>
233
+ <option value="0">danska</option>
234
+ <option value="1">tyska</option>
235
+ <option value="2">engelska</option>
236
+ <option value="3">franska</option>
237
+ <option value="6">spanska</option>
238
+ <option value="11">ryska</option>
239
+ <option value="14">polska</option>
240
+ <option value="15">kroatiska</option>
241
+ <option value="17">turkiska</option>
242
+ <option value="21">rumänska</option>
243
+ <option value="40">albanska</option>
244
+
245
+ </select>
246
+ </p>
247
+ </div>
248
+
249
+
250
+ <!-- STEMMING -->
251
+
252
+
253
+ <div class="ess-group" id="essi-search-button-2-div">
254
+ <input type="submit" name="x" tabindex="2" id="essi-search-button-2"
255
+ value="Hitta!" >
256
+ </div>
257
+
258
+ </div>
259
+
260
+ </form>
261
+
262
+ </td>
263
+ <td valign="top">
264
+
265
+
266
+
267
+ <div class="ess-result">
268
+
269
+ <h2 class="ess-topcell">
270
+ <strong class="ess-header">Resultat:</strong>
271
+ <strong><span id="essi-hitcount">Inga</span></strong> <span id="essi-hitname">träffar</span> på <strong>barnomsrg</strong><span id="essi-wholesite-prep"> inom <strong id="essi-wholesite">Hela Malmö stads webbplats</strong></span>
272
+ </h2>
273
+
274
+
275
+
276
+
277
+ </div>
278
+
279
+
280
+
281
+
282
+
283
+
284
+
285
+
286
+ <div class="ess-nohits">
287
+ <div class="ess-spelling">
288
+ <span class="ess-qmark">?</span>
289
+
290
+ <h3 class="ess-helpheadline">S&ouml;kfr&aring;gan kan vara felstavad</h3>
291
+
292
+ <p>Menade du <strong><strong>barnomsorg</strong></strong> eller <strong><strong>backgrounds</strong></strong>?</p>
293
+
294
+ <ul>
295
+
296
+ <li>
297
+ Jag vill s&ouml;ka efter
298
+ <a href="http://foo.appliance.siteseeker.se/search/bar/?q=barnomsorg&amp;t=simple&amp;ls=2&amp;d=0&amp;d1=01&amp;d2=1&amp;d3=1970&amp;d4=26&amp;d5=11&amp;d6=2013&amp;s=0&amp;so=1&amp;h=0&amp;hn=20&amp;hd=1&amp;i=sv&amp;sc=click&amp;p=&amp;b=1&amp;c=0&amp;t=s&amp;l=0&amp;ll=-2&amp;f=0&amp;ff=0&amp;oenc=UTF-8&amp;ua=f111c901872e2ee9f3fea7552d9dde4d"><strong>barnomsorg</strong></a>.
299
+ </li>
300
+
301
+ <li>
302
+ Jag vill s&ouml;ka efter
303
+ <a href="http://foo.appliance.siteseeker.se/search/bar/?q=backgrounds&amp;t=simple&amp;ls=2&amp;d=0&amp;d1=01&amp;d2=1&amp;d3=1970&amp;d4=26&amp;d5=11&amp;d6=2013&amp;s=0&amp;so=1&amp;h=0&amp;hn=20&amp;hd=1&amp;i=sv&amp;sc=click&amp;p=&amp;b=1&amp;c=0&amp;t=s&amp;l=0&amp;ll=-2&amp;f=0&amp;ff=0&amp;oenc=UTF-8&amp;ua=f111c901872e2ee9f3fea7552d9dde4d"><strong>backgrounds</strong></a>.
304
+ </li>
305
+
306
+ </ul>
307
+
308
+ </div>
309
+ </div>
310
+
311
+
312
+
313
+
314
+
315
+ <dl class="ess-hits ess-hit">
316
+
317
+
318
+
319
+
320
+
321
+ </dl>
322
+
323
+
324
+
325
+
326
+
327
+
328
+
329
+
330
+ <div id="essi-footer-logo">
331
+ <br >
332
+ <a href="http://www.siteseeker.se/">
333
+ <img src="http://foo.appliance.siteseeker.se/images/modular/poweredbysiteseeker.gif" alt="S&ouml;kning levererad av Euroling SiteSeeker"
334
+ style="border: none;" ></a>
335
+ </div>
336
+
337
+
338
+ <script type="text/javascript" src="http://foo.appliance.siteseeker.se/click/bar/?ua=f111c901872e2ee9f3fea7552d9dde4d&amp;pageloading=1"></script>
339
+
340
+ </td>
341
+ <td valign="top">
342
+
343
+ </td>
344
+ </tr>
345
+ </table>
346
+ </body>
347
+ </html>
@@ -0,0 +1,69 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'spec_helper'
3
+
4
+ describe SiteseekerNormalizer do
5
+ before(:each) do
6
+ raw_results = open("spec/fixtures/barn.html").read
7
+ @results = SiteseekerNormalizer::Parse.new(raw_results, encoding: "UTF-8")
8
+ end
9
+
10
+ it "should have a number of hits" do
11
+ @results.total.should be_a Fixnum
12
+ end
13
+
14
+ it "should have results" do
15
+ @results.entries.count.should > 0
16
+ end
17
+
18
+ describe "result entry" do
19
+ it "should have an order number" do
20
+ @results.entries.first.number.should eq 1
21
+ end
22
+
23
+ it "should have a title" do
24
+ @results.entries.first.title.should be_a String
25
+ end
26
+
27
+ it "should have an extract" do
28
+ @results.entries.first.summary.should be_a String
29
+ end
30
+
31
+ it "should have a breadcrumb" do
32
+ @results.entries.first.breadcrumbs.should be_an Array
33
+ end
34
+
35
+ it "should have a category" do
36
+ @results.entries.first.category.should be_a String
37
+ end
38
+
39
+ it "should have a date string" do
40
+ @results.entries.first.date.should be_a String
41
+ end
42
+ end
43
+
44
+ it "should have sorting" do
45
+ @results.sorting.should be_an Array
46
+ end
47
+
48
+ it "should have a first sorting entry with text" do
49
+ @results.sorting.first.text.should be_a String
50
+ end
51
+
52
+ it "should have a second sorting entry with an url" do
53
+ @results.sorting[1].query.should be_a String
54
+ end
55
+
56
+ it "should have a query string for getting more results" do
57
+ @results.more_query.should be_a String
58
+ end
59
+
60
+ it "should have a categories" do
61
+ @results.category_groups.should be_an Array
62
+ end
63
+
64
+ it "should show a spelling suggestions" do
65
+ raw_results = open("spec/fixtures/barnomsrg.html").read
66
+ results = SiteseekerNormalizer::Parse.new(raw_results, encoding: "UTF-8")
67
+ results.suggestions.count.should > 0
68
+ end
69
+ end
@@ -0,0 +1,2 @@
1
+ require "siteseeker_normalizer"
2
+ require 'nokogiri'
metadata ADDED
@@ -0,0 +1,122 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: siteseeker_normalizer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.6
5
+ platform: ruby
6
+ authors:
7
+ - martent
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-11-26 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.5'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.5'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '1.3'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '1.3'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: Siteseeker integration library
70
+ email:
71
+ - marten@thavenius.se
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - .gitignore
77
+ - .rspec
78
+ - COPYING
79
+ - Gemfile
80
+ - README.md
81
+ - Rakefile
82
+ - lib/siteseeker_normalizer.rb
83
+ - lib/siteseeker_normalizer/client.rb
84
+ - lib/siteseeker_normalizer/parse.rb
85
+ - lib/siteseeker_normalizer/parse/category.rb
86
+ - lib/siteseeker_normalizer/parse/entry.rb
87
+ - lib/siteseeker_normalizer/version.rb
88
+ - siteseeker_normalizer.gemspec
89
+ - spec/fixtures/barn.html
90
+ - spec/fixtures/barnomsrg.html
91
+ - spec/siteseeeker_normalizer_spec.rb
92
+ - spec/spec_helper.rb
93
+ homepage: https://github.com/malmostad/siteseeker_normalizer
94
+ licenses:
95
+ - AGPL v3
96
+ metadata: {}
97
+ post_install_message:
98
+ rdoc_options: []
99
+ require_paths:
100
+ - lib
101
+ required_ruby_version: !ruby/object:Gem::Requirement
102
+ requirements:
103
+ - - '>='
104
+ - !ruby/object:Gem::Version
105
+ version: '0'
106
+ required_rubygems_version: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - '>='
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ requirements: []
112
+ rubyforge_project:
113
+ rubygems_version: 2.1.11
114
+ signing_key:
115
+ specification_version: 4
116
+ summary: A Ruby Gem for making requests and parsing the response from Siteseeker to
117
+ a structured object.
118
+ test_files:
119
+ - spec/fixtures/barn.html
120
+ - spec/fixtures/barnomsrg.html
121
+ - spec/siteseeeker_normalizer_spec.rb
122
+ - spec/spec_helper.rb