iudex-core 1.0.0-java → 1.1.0-java

Sign up to get free protection for your applications and to get access to all the features.
data/config/mojibake ADDED
@@ -0,0 +1,268 @@
1
+ # -*- coding: utf-8 -*- mojibake: 1.0.0
2
+ /Â[\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008A\u008B\u008C\u008D\u008E\u008F\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009A\u009B\u009C\u009D\u009E\u009F\u00A0¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ŒœŠšŸŽžƒˆ˜–—‘’‚“”„†‡•…‰‹›€™\uFFFD]|Ã[\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008A\u008B\u008C\u008D\u008E\u008F\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009A\u009B\u009C\u009D\u009E\u009F\u00A0¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ŒœŠšŸŽžƒˆ˜–—‘’‚“”„†‡•…‰‹›€™\uFFFD]|Å[\u0092\u0093\u00A0¡¸½¾’“]|Æ[\u0092’]|Ë[\u0086\u009Cœ†]|â(\u0080[\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008A\u008B\u0093\u0094\u0098\u0099\u009A\u009C\u009D\u009E\u00A0¡¢¦°¹º]|\u0081\u00A0|\u0082¬|\u0084¢|‚¬|„¢|€[\u0081\u009D\u00A0¡¢¦°¹ºœŠšžƒˆ˜‚“”„†‡…‰‹€™\uFFFD]|\uFFFD\u00A0)|ï(»¿|¿[½¾])/
3
+
4
+ Moji UNICODE Org CODE
5
+ +---- ---- ---- ---- ----- ---+
6
+ [€] 00C2 0080 [€] 0080
7
+ [] 00C2 0081 [] 0081
8
+ [‚] 00C2 0082 [‚] 0082
9
+ [ƒ] 00C2 0083 [ƒ] 0083
10
+ [„] 00C2 0084 [„] 0084
11
+ […] 00C2 0085 […] 0085
12
+ [†] 00C2 0086 [†] 0086
13
+ [‡] 00C2 0087 [‡] 0087
14
+ [ˆ] 00C2 0088 [ˆ] 0088
15
+ [‰] 00C2 0089 [‰] 0089
16
+ [Š] 00C2 008A [Š] 008A
17
+ [‹] 00C2 008B [‹] 008B
18
+ [Œ] 00C2 008C [Œ] 008C
19
+ [] 00C2 008D [] 008D
20
+ [Ž] 00C2 008E [Ž] 008E
21
+ [] 00C2 008F [] 008F
22
+ [] 00C2 0090 [] 0090
23
+ [‘] 00C2 0091 [‘] 0091
24
+ [’] 00C2 0092 [’] 0092
25
+ [“] 00C2 0093 [“] 0093
26
+ [”] 00C2 0094 [”] 0094
27
+ [•] 00C2 0095 [•] 0095
28
+ [–] 00C2 0096 [–] 0096
29
+ [—] 00C2 0097 [—] 0097
30
+ [˜] 00C2 0098 [˜] 0098
31
+ [™] 00C2 0099 [™] 0099
32
+ [š] 00C2 009A [š] 009A
33
+ [›] 00C2 009B [›] 009B
34
+ [œ] 00C2 009C [œ] 009C
35
+ [] 00C2 009D [] 009D
36
+ [ž] 00C2 009E [ž] 009E
37
+ [Ÿ] 00C2 009F [Ÿ] 009F
38
+ [ ] 00C2 00A0 [ ] 00A0
39
+ [¡] 00C2 00A1 [¡] 00A1
40
+ [¢] 00C2 00A2 [¢] 00A2
41
+ [£] 00C2 00A3 [£] 00A3
42
+ [¤] 00C2 00A4 [¤] 00A4
43
+ [Â¥] 00C2 00A5 [¥] 00A5
44
+ [¦] 00C2 00A6 [¦] 00A6
45
+ [§] 00C2 00A7 [§] 00A7
46
+ [¨] 00C2 00A8 [¨] 00A8
47
+ [©] 00C2 00A9 [©] 00A9
48
+ [ª] 00C2 00AA [ª] 00AA
49
+ [«] 00C2 00AB [«] 00AB
50
+ [¬] 00C2 00AC [¬] 00AC
51
+ [­] 00C2 00AD [­] 00AD
52
+ [®] 00C2 00AE [®] 00AE
53
+ [¯] 00C2 00AF [¯] 00AF
54
+ [°] 00C2 00B0 [°] 00B0
55
+ [±] 00C2 00B1 [±] 00B1
56
+ [²] 00C2 00B2 [²] 00B2
57
+ [³] 00C2 00B3 [³] 00B3
58
+ [´] 00C2 00B4 [´] 00B4
59
+ [µ] 00C2 00B5 [µ] 00B5
60
+ [¶] 00C2 00B6 [¶] 00B6
61
+ [·] 00C2 00B7 [·] 00B7
62
+ [¸] 00C2 00B8 [¸] 00B8
63
+ [¹] 00C2 00B9 [¹] 00B9
64
+ [º] 00C2 00BA [º] 00BA
65
+ [»] 00C2 00BB [»] 00BB
66
+ [¼] 00C2 00BC [¼] 00BC
67
+ [½] 00C2 00BD [½] 00BD
68
+ [¾] 00C2 00BE [¾] 00BE
69
+ [¿] 00C2 00BF [¿] 00BF
70
+ [ÂŒ] 00C2 0152 [Œ] 008C
71
+ [Âœ] 00C2 0153 [œ] 009C
72
+ [Š] 00C2 0160 [Š] 008A
73
+ [š] 00C2 0161 [š] 009A
74
+ [Ÿ] 00C2 0178 [Ÿ] 009F
75
+ [ÂŽ] 00C2 017D [Ž] 008E
76
+ [ž] 00C2 017E [ž] 009E
77
+ [ƒ] 00C2 0192 [ƒ] 0083
78
+ [ˆ] 00C2 02C6 [ˆ] 0088
79
+ [˜] 00C2 02DC [˜] 0098
80
+ [–] 00C2 2013 [–] 0096
81
+ [—] 00C2 2014 [—] 0097
82
+ [‘] 00C2 2018 [‘] 0091
83
+ [Â’] 00C2 2019 [’] 0092
84
+ [‚] 00C2 201A [‚] 0082
85
+ [“] 00C2 201C [“] 0093
86
+ [”] 00C2 201D [”] 0094
87
+ [„] 00C2 201E [„] 0084
88
+ [†] 00C2 2020 [†] 0086
89
+ [‡] 00C2 2021 [‡] 0087
90
+ [•] 00C2 2022 [•] 0095
91
+ [Â…] 00C2 2026 […] 0085
92
+ [‰] 00C2 2030 [‰] 0089
93
+ [‹] 00C2 2039 [‹] 008B
94
+ [›] 00C2 203A [›] 009B
95
+ [€] 00C2 20AC [€] 0080
96
+ [™] 00C2 2122 [™] 0099
97
+ [Â�] 00C2 FFFD [] 0081
98
+ [À] 00C3 0080 [À] 00C0
99
+ [Á] 00C3 0081 [Á] 00C1
100
+ [Â] 00C3 0082 [Â] 00C2
101
+ [Ã] 00C3 0083 [Ã] 00C3
102
+ [Ä] 00C3 0084 [Ä] 00C4
103
+ [Å] 00C3 0085 [Å] 00C5
104
+ [Æ] 00C3 0086 [Æ] 00C6
105
+ [Ç] 00C3 0087 [Ç] 00C7
106
+ [È] 00C3 0088 [È] 00C8
107
+ [É] 00C3 0089 [É] 00C9
108
+ [Ê] 00C3 008A [Ê] 00CA
109
+ [Ë] 00C3 008B [Ë] 00CB
110
+ [Ì] 00C3 008C [Ì] 00CC
111
+ [Í] 00C3 008D [Í] 00CD
112
+ [Î] 00C3 008E [Î] 00CE
113
+ [Ï] 00C3 008F [Ï] 00CF
114
+ [Ð] 00C3 0090 [Ð] 00D0
115
+ [Ñ] 00C3 0091 [Ñ] 00D1
116
+ [Ò] 00C3 0092 [Ò] 00D2
117
+ [Ó] 00C3 0093 [Ó] 00D3
118
+ [Ô] 00C3 0094 [Ô] 00D4
119
+ [Õ] 00C3 0095 [Õ] 00D5
120
+ [Ö] 00C3 0096 [Ö] 00D6
121
+ [×] 00C3 0097 [×] 00D7
122
+ [Ø] 00C3 0098 [Ø] 00D8
123
+ [Ù] 00C3 0099 [Ù] 00D9
124
+ [Ú] 00C3 009A [Ú] 00DA
125
+ [Û] 00C3 009B [Û] 00DB
126
+ [Ü] 00C3 009C [Ü] 00DC
127
+ [Ý] 00C3 009D [Ý] 00DD
128
+ [Þ] 00C3 009E [Þ] 00DE
129
+ [ß] 00C3 009F [ß] 00DF
130
+ [à] 00C3 00A0 [à] 00E0
131
+ [á] 00C3 00A1 [á] 00E1
132
+ [â] 00C3 00A2 [â] 00E2
133
+ [ã] 00C3 00A3 [ã] 00E3
134
+ [ä] 00C3 00A4 [ä] 00E4
135
+ [Ã¥] 00C3 00A5 [å] 00E5
136
+ [æ] 00C3 00A6 [æ] 00E6
137
+ [ç] 00C3 00A7 [ç] 00E7
138
+ [è] 00C3 00A8 [è] 00E8
139
+ [é] 00C3 00A9 [é] 00E9
140
+ [ê] 00C3 00AA [ê] 00EA
141
+ [ë] 00C3 00AB [ë] 00EB
142
+ [ì] 00C3 00AC [ì] 00EC
143
+ [í] 00C3 00AD [í] 00ED
144
+ [î] 00C3 00AE [î] 00EE
145
+ [ï] 00C3 00AF [ï] 00EF
146
+ [ð] 00C3 00B0 [ð] 00F0
147
+ [ñ] 00C3 00B1 [ñ] 00F1
148
+ [ò] 00C3 00B2 [ò] 00F2
149
+ [ó] 00C3 00B3 [ó] 00F3
150
+ [ô] 00C3 00B4 [ô] 00F4
151
+ [õ] 00C3 00B5 [õ] 00F5
152
+ [ö] 00C3 00B6 [ö] 00F6
153
+ [÷] 00C3 00B7 [÷] 00F7
154
+ [ø] 00C3 00B8 [ø] 00F8
155
+ [ù] 00C3 00B9 [ù] 00F9
156
+ [ú] 00C3 00BA [ú] 00FA
157
+ [û] 00C3 00BB [û] 00FB
158
+ [ü] 00C3 00BC [ü] 00FC
159
+ [ý] 00C3 00BD [ý] 00FD
160
+ [þ] 00C3 00BE [þ] 00FE
161
+ [ÿ] 00C3 00BF [ÿ] 00FF
162
+ [ÃŒ] 00C3 0152 [Ì] 00CC
163
+ [Ãœ] 00C3 0153 [Ü] 00DC
164
+ [Ê] 00C3 0160 [Ê] 00CA
165
+ [Ú] 00C3 0161 [Ú] 00DA
166
+ [ß] 00C3 0178 [ß] 00DF
167
+ [ÃŽ] 00C3 017D [Î] 00CE
168
+ [Þ] 00C3 017E [Þ] 00DE
169
+ [Ã] 00C3 0192 [Ã] 00C3
170
+ [È] 00C3 02C6 [È] 00C8
171
+ [Ø] 00C3 02DC [Ø] 00D8
172
+ [Ö] 00C3 2013 [Ö] 00D6
173
+ [×] 00C3 2014 [×] 00D7
174
+ [Ñ] 00C3 2018 [Ñ] 00D1
175
+ [Ã’] 00C3 2019 [Ò] 00D2
176
+ [Â] 00C3 201A [Â] 00C2
177
+ [Ó] 00C3 201C [Ó] 00D3
178
+ [Ô] 00C3 201D [Ô] 00D4
179
+ [Ä] 00C3 201E [Ä] 00C4
180
+ [Æ] 00C3 2020 [Æ] 00C6
181
+ [Ç] 00C3 2021 [Ç] 00C7
182
+ [Õ] 00C3 2022 [Õ] 00D5
183
+ [Ã…] 00C3 2026 [Å] 00C5
184
+ [É] 00C3 2030 [É] 00C9
185
+ [Ë] 00C3 2039 [Ë] 00CB
186
+ [Û] 00C3 203A [Û] 00DB
187
+ [À] 00C3 20AC [À] 00C0
188
+ [Ù] 00C3 2122 [Ù] 00D9
189
+ [Ã�] 00C3 FFFD [Á] 00C1
190
+ [Œ] 00C5 0092 [Œ] 0152
191
+ [œ] 00C5 0093 [œ] 0153
192
+ [Å ] 00C5 00A0 [Š] 0160
193
+ [Å¡] 00C5 00A1 [š] 0161
194
+ [Ÿ] 00C5 00B8 [Ÿ] 0178
195
+ [Ž] 00C5 00BD [Ž] 017D
196
+ [ž] 00C5 00BE [ž] 017E
197
+ [Å’] 00C5 2019 [Œ] 0152
198
+ [Å“] 00C5 201C [œ] 0153
199
+ [ƒ] 00C6 0092 [ƒ] 0192
200
+ [Æ’] 00C6 2019 [ƒ] 0192
201
+ [ˆ] 00CB 0086 [ˆ] 02C6
202
+ [˜] 00CB 009C [˜] 02DC
203
+ [Ëœ] 00CB 0153 [˜] 02DC
204
+ [ˆ] 00CB 2020 [ˆ] 02C6
205
+ [ ] 00E2 0080 0080 [ ] 2000
206
+ [ ] 00E2 0080 0081 [ ] 2001
207
+ [ ] 00E2 0080 0082 [ ] 2002
208
+ [ ] 00E2 0080 0083 [ ] 2003
209
+ [ ] 00E2 0080 0084 [ ] 2004
210
+ [ ] 00E2 0080 0085 [ ] 2005
211
+ [ ] 00E2 0080 0086 [ ] 2006
212
+ [ ] 00E2 0080 0087 [ ] 2007
213
+ [ ] 00E2 0080 0088 [ ] 2008
214
+ [ ] 00E2 0080 0089 [ ] 2009
215
+ [ ] 00E2 0080 008A [ ] 200A
216
+ [​] 00E2 0080 008B [​] 200B
217
+ [–] 00E2 0080 0093 [–] 2013
218
+ [—] 00E2 0080 0094 [—] 2014
219
+ [‘] 00E2 0080 0098 [‘] 2018
220
+ [’] 00E2 0080 0099 [’] 2019
221
+ [‚] 00E2 0080 009A [‚] 201A
222
+ [“] 00E2 0080 009C [“] 201C
223
+ [”] 00E2 0080 009D [”] 201D
224
+ [„] 00E2 0080 009E [„] 201E
225
+ [†] 00E2 0080 00A0 [†] 2020
226
+ [‡] 00E2 0080 00A1 [‡] 2021
227
+ [•] 00E2 0080 00A2 [•] 2022
228
+ […] 00E2 0080 00A6 […] 2026
229
+ [‰] 00E2 0080 00B0 [‰] 2030
230
+ [‹] 00E2 0080 00B9 [‹] 2039
231
+ [›] 00E2 0080 00BA [›] 203A
232
+ [⁠] 00E2 0081 00A0 [⁠] 2060
233
+ [€] 00E2 0082 00AC [€] 20AC
234
+ [™] 00E2 0084 00A2 [™] 2122
235
+ [€] 00E2 201A 00AC [€] 20AC
236
+ [â„¢] 00E2 201E 00A2 [™] 2122
237
+ [ ] 00E2 20AC 0081 [ ] 2001
238
+ [”] 00E2 20AC 009D [”] 201D
239
+ [†] 00E2 20AC 00A0 [†] 2020
240
+ [‡] 00E2 20AC 00A1 [‡] 2021
241
+ [•] 00E2 20AC 00A2 [•] 2022
242
+ […] 00E2 20AC 00A6 […] 2026
243
+ [‰] 00E2 20AC 00B0 [‰] 2030
244
+ [‹] 00E2 20AC 00B9 [‹] 2039
245
+ [›] 00E2 20AC 00BA [›] 203A
246
+ [“] 00E2 20AC 0153 [“] 201C
247
+ [ ] 00E2 20AC 0160 [ ] 200A
248
+ [‚] 00E2 20AC 0161 [‚] 201A
249
+ [„] 00E2 20AC 017E [„] 201E
250
+ [ ] 00E2 20AC 0192 [ ] 2003
251
+ [ ] 00E2 20AC 02C6 [ ] 2008
252
+ [‘] 00E2 20AC 02DC [‘] 2018
253
+ [ ] 00E2 20AC 201A [ ] 2002
254
+ [–] 00E2 20AC 201C [–] 2013
255
+ [—] 00E2 20AC 201D [—] 2014
256
+ [ ] 00E2 20AC 201E [ ] 2004
257
+ [ ] 00E2 20AC 2020 [ ] 2006
258
+ [ ] 00E2 20AC 2021 [ ] 2007
259
+ [ ] 00E2 20AC 2026 [ ] 2005
260
+ [ ] 00E2 20AC 2030 [ ] 2009
261
+ [​] 00E2 20AC 2039 [​] 200B
262
+ [ ] 00E2 20AC 20AC [ ] 2000
263
+ [’] 00E2 20AC 2122 [’] 2019
264
+ [â€�] 00E2 20AC FFFD [”] 201D
265
+ [â� ] 00E2 FFFD 00A0 [⁠] 2060
266
+ [] 00EF 00BB 00BF [] FEFF
267
+ [�] 00EF 00BF 00BD [�] FFFD
268
+ [￾] 00EF 00BF 00BE [￾] FFFE
@@ -16,7 +16,7 @@
16
16
 
17
17
  module Iudex
18
18
  module Core
19
- VERSION = '1.0.0'
19
+ VERSION = '1.1.0'
20
20
 
21
21
  LIB_DIR = File.dirname( __FILE__ ) # :nodoc:
22
22
  end
Binary file
@@ -0,0 +1,73 @@
1
+ #--
2
+ # Copyright (c) 2011 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You
6
+ # may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ require 'iudex-core'
18
+ require 'java'
19
+
20
+ module Iudex::Core
21
+
22
+ module MojiBake
23
+ DEFAULT_CONFIG = File.join( File.dirname( __FILE__ ),
24
+ '..', '..', 'config', 'mojibake' )
25
+
26
+ def self.load_config( file = DEFAULT_CONFIG )
27
+ regex = nil
28
+ mojis = []
29
+ File.open( file ) do |fin|
30
+ fin.each do |line|
31
+ case line
32
+ when %r{^/([^/]+)/$}
33
+ regex = $1
34
+ when /^\[.*?\]\s+([0-9A-F ]+)\s+\[.*\]\s+([0-9A-F]+)$/
35
+ mojis << [ $1.split( ' ' ), $2 ]
36
+ end
37
+ end
38
+ end
39
+
40
+ mh = Java::java.util.HashMap.new( 512 )
41
+ mojis.each do | moji, rpl |
42
+ mh.put( jstring( moji ), jstring( rpl ) )
43
+ end
44
+ [ regex, mh ]
45
+ end
46
+
47
+ private
48
+
49
+ def self.jstring( cps )
50
+ cs = cps.map { |cp| cp.hex }.to_java( :char )
51
+ Java::java.lang.String.new( cs )
52
+ end
53
+
54
+ end
55
+
56
+ module Filters
57
+ import 'iudex.core.filters.MojiBakeFilter'
58
+
59
+ # Re-open iudex.core.filters.MojiBakeFilter to add config file
60
+ # based initialization.
61
+ class MojiBakeFilter
62
+
63
+ # Alt constructor taking a configuration file in `mojibake -t`
64
+ # format.
65
+ def initialize( key, config_file = MojiBake::DEFAULT_CONFIG )
66
+ super( key, *MojiBake.load_config( config_file ) )
67
+ end
68
+
69
+ end
70
+
71
+ end
72
+
73
+ end
data/lib/iudex-core.rb CHANGED
@@ -1,3 +1,4 @@
1
+
1
2
  #--
2
3
  # Copyright (c) 2008-2011 David Kellum
3
4
  #
@@ -31,7 +32,9 @@ module Iudex
31
32
 
32
33
  import 'iudex.core.ContentKeys'
33
34
  import 'iudex.core.ContentSource'
34
- import 'iudex.core.VisitExecutor'
35
+ import 'iudex.core.VisitManager'
36
+ import 'iudex.core.VisitQueueFactory'
37
+ import 'iudex.core.VisitQueue'
35
38
  import 'iudex.core.VisitURL'
36
39
 
37
40
  module Filters
@@ -40,10 +43,13 @@ module Iudex
40
43
  import 'iudex.core.filters.DateChangeFilter'
41
44
  import 'iudex.core.filters.DefaultFilter'
42
45
  import 'iudex.core.filters.FutureDateFilter'
43
- import 'iudex.core.filters.RLDomainFilter'
46
+ import 'iudex.core.filters.RedirectHandler'
47
+ import 'iudex.core.filters.Revisitor'
44
48
  import 'iudex.core.filters.TextCtrlWSFilter'
45
49
  import 'iudex.core.filters.UHashMDCSetter'
46
50
  end
47
51
 
48
52
  end
49
53
  end
54
+
55
+ require 'iudex-core/mojibake'
data/pom.xml CHANGED
@@ -5,13 +5,13 @@
5
5
  <groupId>iudex</groupId>
6
6
  <artifactId>iudex-core</artifactId>
7
7
  <packaging>jar</packaging>
8
- <version>1.0.0</version>
8
+ <version>1.1.0</version>
9
9
  <name>Iudex Core System</name>
10
10
 
11
11
  <parent>
12
12
  <groupId>iudex</groupId>
13
13
  <artifactId>iudex-parent</artifactId>
14
- <version>1.0</version>
14
+ <version>1.1</version>
15
15
  <relativePath>..</relativePath>
16
16
  </parent>
17
17
 
@@ -30,19 +30,19 @@
30
30
  <dependency>
31
31
  <groupId>iudex</groupId>
32
32
  <artifactId>iudex-filter</artifactId>
33
- <version>[1.0,1.1)</version>
33
+ <version>[1.1,1.2)</version>
34
34
  </dependency>
35
35
 
36
36
  <dependency>
37
37
  <groupId>iudex</groupId>
38
38
  <artifactId>iudex-http</artifactId>
39
- <version>[1.0,1.1)</version>
39
+ <version>[1.1,1.2)</version>
40
40
  </dependency>
41
41
 
42
42
  <dependency>
43
43
  <groupId>iudex</groupId>
44
44
  <artifactId>iudex-barc</artifactId>
45
- <version>[1.0,1.1)</version>
45
+ <version>[1.1,1.2)</version>
46
46
  </dependency>
47
47
 
48
48
  <dependency>
@@ -37,10 +37,16 @@ module TestHTTPMocks
37
37
  WEAK_ETAG = 'W/"weak-etag"'
38
38
 
39
39
  class MockSession < Iudex::HTTP::HTTPSession
40
- import 'com.gravitext.util.ByteBufferInputStream'
41
40
  import 'java.nio.ByteBuffer'
42
41
  include Iudex::HTTP
43
42
 
43
+ attr_writer :status
44
+
45
+ def initialize
46
+ super()
47
+ @status = 200
48
+ end
49
+
44
50
  def requestHeaders
45
51
  [ ]
46
52
  end
@@ -49,12 +55,12 @@ module TestHTTPMocks
49
55
  [ Header.new( "ETag", WEAK_ETAG ) ]
50
56
  end
51
57
 
52
- def responseCode
53
- 200
58
+ def statusCode
59
+ @status
54
60
  end
55
61
 
56
- def responseStream
57
- ByteBufferInputStream.new( ByteBuffer::wrap( "".to_java_bytes ) )
62
+ def responseBody
63
+ ByteBuffer::wrap( "BODY".to_java_bytes )
58
64
  end
59
65
 
60
66
  def statusText
@@ -62,7 +68,10 @@ module TestHTTPMocks
62
68
  end
63
69
 
64
70
  def execute( handler )
65
- handler.handle_success( self )
71
+ handler.session_completed( self )
72
+ end
73
+
74
+ def close
66
75
  end
67
76
  end
68
77
 
@@ -88,6 +97,20 @@ module TestHTTPMocks
88
97
  end
89
98
  end
90
99
 
100
+ import 'iudex.core.VisitCounter'
101
+
102
+ class TestVisitCounter
103
+ include VisitCounter
104
+ attr_reader :released
105
+
106
+ def add( order )
107
+ end
108
+
109
+ def release( acquired, newOrder )
110
+ @released = acquired.url
111
+ end
112
+ end
113
+
91
114
  end
92
115
 
93
116
  class TestContentFetcher < MiniTest::Unit::TestCase
@@ -119,54 +142,27 @@ class TestContentFetcher < MiniTest::Unit::TestCase
119
142
  def test_304
120
143
  client = MockHTTPClient.new
121
144
  def client.request( session, handler )
122
- handler.handle_error( session, 304 )
145
+ session.status = 304
146
+ handler.session_completed( session )
123
147
  end
124
148
  fetch( create_content, client ) do |out|
125
149
  assert_equal( DEFAULT_URL, out.url.to_s )
126
150
  assert_equal( 304, out.status )
127
- assert_nil( out.etag )
128
- assert_nil( out.source )
129
- end
130
- end
131
-
132
- REDIRECT_URL = "http://gravitext.com/redirect#foo"
133
- REDIRECT_NORM = "http://gravitext.com/redirect"
134
-
135
- def test_redirect
136
- client = MockHTTPClient.new
137
- def client.create_session
138
- s = MockSession.new
139
- def s.execute( handler )
140
- self.url = REDIRECT_URL
141
- super
142
- end
143
- s
144
- end
145
- fetch( create_content, client ) do |out|
146
- assert_equal( REDIRECT_NORM, out.url.to_s )
147
- assert_equal( 200, out.status )
148
-
149
- ref = out.referer
150
-
151
- assert_equal( DEFAULT_URL, ref.url.to_s )
152
- assert_equal( 302, ref.status )
153
- assert_equal( REDIRECT_NORM, ref.referent.url.to_s )
154
151
  end
155
152
  end
156
153
 
157
154
  import "java.net.UnknownHostException"
158
- import "java.io.IOException"
159
155
 
160
156
  def test_connect_error
161
157
  client = MockHTTPClient.new
162
158
  def client.create_session
163
159
  s = MockSession.new
164
160
  def s.execute( handler )
165
- handler.handle_exception( self,
166
- UnknownHostException.new( "foobar.com" ) )
161
+ self.error = UnknownHostException.new( "foobar.com" )
162
+ handler.session_completed( self )
167
163
  end
168
- def s.responseCode
169
- nil
164
+ def s.statusCode
165
+ -1
170
166
  end
171
167
  def s.responseHeaders
172
168
  nil
@@ -182,7 +178,9 @@ class TestContentFetcher < MiniTest::Unit::TestCase
182
178
 
183
179
  def fetch( content, client = MockHTTPClient.new, &block )
184
180
  rec = TestReceiver.new( &block )
181
+ counter = TestVisitCounter.new
185
182
  cf = ContentFetcher.new( client,
183
+ counter,
186
184
  FilterChain.new( "test-rec", [ rec ] ) )
187
185
  cf.filter( content )
188
186
  end
@@ -0,0 +1,75 @@
1
+ #!/usr/bin/env jruby
2
+ #.hashdot.profile += jruby-shortlived
3
+
4
+ #--
5
+ # Copyright (c) 2011 David Kellum
6
+ #
7
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
8
+ # may not use this file except in compliance with the License. You
9
+ # may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
16
+ # implied. See the License for the specific language governing
17
+ # permissions and limitations under the License.
18
+ #++
19
+
20
+ require File.join( File.dirname( __FILE__ ), "setup" )
21
+ require 'iudex-core'
22
+
23
+ class TestContentSource < MiniTest::Unit::TestCase
24
+ include Iudex::Core
25
+
26
+ import 'java.nio.ByteBuffer'
27
+ import 'java.nio.charset.Charset'
28
+
29
+ def self.charset( name )
30
+ Charset::lookup( name )
31
+ end
32
+
33
+ UTF8 = charset( "UTF-8" )
34
+ ISO1 = charset( "ISO-8859-1" )
35
+
36
+ def setup
37
+ @cs = ContentSource.new( ByteBuffer::wrap( "any".to_java_bytes ) )
38
+ end
39
+
40
+ def test_default_encoding
41
+ refute( @cs.default_encoding )
42
+ end
43
+
44
+ def test_default_encoding
45
+ assert( @cs.set_default_encoding( UTF8, 0.0 ) )
46
+ assert_equal( UTF8, @cs.default_encoding )
47
+ assert_in_epsilon( 0.0, @cs.encoding_confidence )
48
+ end
49
+
50
+ def test_default_encoding_additive
51
+ 2.times { assert( @cs.set_default_encoding( UTF8, 0.10 ) ) }
52
+ assert_equal( UTF8, @cs.default_encoding )
53
+ assert_in_epsilon( 0.20, @cs.encoding_confidence )
54
+ end
55
+
56
+ def test_default_encoding_map
57
+ assert( @cs.set_default_encoding( { UTF8 => f( 0.10 ),
58
+ ISO1 => f( 0.20 ) } ) )
59
+ assert_equal( ISO1, @cs.default_encoding )
60
+ assert_in_epsilon( 0.20, @cs.encoding_confidence )
61
+
62
+ refute( @cs.set_default_encoding( {} ) )
63
+ refute( @cs.set_default_encoding( { UTF8 => f( 0.05 ) } ) )
64
+ assert( @cs.set_default_encoding( { UTF8 => f( 0.07 ),
65
+ ISO1 => f( 0.01 ) } ) )
66
+
67
+ assert_equal( UTF8, @cs.default_encoding )
68
+ assert_in_epsilon( 0.22, @cs.encoding_confidence )
69
+ end
70
+
71
+ def f( v )
72
+ Java::java.lang.Float.new( v )
73
+ end
74
+
75
+ end
@@ -0,0 +1,58 @@
1
+ #!/usr/bin/env jruby
2
+ # -*- coding: utf-8 -*-
3
+ #.hashdot.profile += jruby-shortlived
4
+
5
+ #--
6
+ # Copyright (c) 2011 David Kellum
7
+ #
8
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
9
+ # may not use this file except in compliance with the License. You
10
+ # may obtain a copy of the License at
11
+ #
12
+ # http://www.apache.org/licenses/LICENSE-2.0
13
+ #
14
+ # Unless required by applicable law or agreed to in writing, software
15
+ # distributed under the License is distributed on an "AS IS" BASIS,
16
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
17
+ # implied. See the License for the specific language governing
18
+ # permissions and limitations under the License.
19
+ #++
20
+
21
+ require File.join( File.dirname( __FILE__ ), "setup" )
22
+ require 'iudex-core/mojibake'
23
+
24
+ class TestMojiBake < MiniTest::Unit::TestCase
25
+ include Gravitext::HTMap
26
+ include Iudex::Core
27
+ include Iudex::Core::Filters
28
+
29
+ UniMap.define_accessors
30
+
31
+ FILTER = MojiBakeFilter.new( ContentKeys::SUMMARY )
32
+
33
+ def test_nomatch_recover
34
+ assert_filter( '', '' )
35
+ assert_filter( 'ascii', 'ascii' )
36
+ assert_filter( 'Â', 'Â' )
37
+ end
38
+
39
+ def test_simple_recover
40
+ assert_filter( '[°]', '[°]' )
41
+ assert_filter( '“quoted”', '“quotedâ€�' )
42
+ assert_filter( '“quoted”', '“quoted”' )
43
+ end
44
+
45
+ def test_recursive_recover
46
+ assert_filter( '°', '°' )
47
+ assert_filter( 'AP – Greenlake', 'AP – Greenlake' )
48
+ assert_filter( 'you’re', 'you’re' )
49
+ end
50
+
51
+ def assert_filter( output, input )
52
+ map = UniMap.new
53
+ map.summary = input
54
+ assert( FILTER.filter( map ) )
55
+ assert_equal( output, map.summary.to_s, "From: #{input}" )
56
+ end
57
+
58
+ end