iudex-core 1.0.0-java → 1.1.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/config/mojibake ADDED
@@ -0,0 +1,268 @@
1
+ # -*- coding: utf-8 -*- mojibake: 1.0.0
2
+ /Â[\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008A\u008B\u008C\u008D\u008E\u008F\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009A\u009B\u009C\u009D\u009E\u009F\u00A0¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ŒœŠšŸŽžƒˆ˜–—‘’‚“”„†‡•…‰‹›€™\uFFFD]|Ã[\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008A\u008B\u008C\u008D\u008E\u008F\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009A\u009B\u009C\u009D\u009E\u009F\u00A0¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ŒœŠšŸŽžƒˆ˜–—‘’‚“”„†‡•…‰‹›€™\uFFFD]|Å[\u0092\u0093\u00A0¡¸½¾’“]|Æ[\u0092’]|Ë[\u0086\u009Cœ†]|â(\u0080[\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008A\u008B\u0093\u0094\u0098\u0099\u009A\u009C\u009D\u009E\u00A0¡¢¦°¹º]|\u0081\u00A0|\u0082¬|\u0084¢|‚¬|„¢|€[\u0081\u009D\u00A0¡¢¦°¹ºœŠšžƒˆ˜‚“”„†‡…‰‹€™\uFFFD]|\uFFFD\u00A0)|ï(»¿|¿[½¾])/
3
+
4
+ Moji UNICODE Org CODE
5
+ +---- ---- ---- ---- ----- ---+
6
+ [€] 00C2 0080 [€] 0080
7
+ [] 00C2 0081 [] 0081
8
+ [‚] 00C2 0082 [‚] 0082
9
+ [ƒ] 00C2 0083 [ƒ] 0083
10
+ [„] 00C2 0084 [„] 0084
11
+ […] 00C2 0085 […] 0085
12
+ [†] 00C2 0086 [†] 0086
13
+ [‡] 00C2 0087 [‡] 0087
14
+ [ˆ] 00C2 0088 [ˆ] 0088
15
+ [‰] 00C2 0089 [‰] 0089
16
+ [Š] 00C2 008A [Š] 008A
17
+ [‹] 00C2 008B [‹] 008B
18
+ [Œ] 00C2 008C [Œ] 008C
19
+ [] 00C2 008D [] 008D
20
+ [Ž] 00C2 008E [Ž] 008E
21
+ [] 00C2 008F [] 008F
22
+ [] 00C2 0090 [] 0090
23
+ [‘] 00C2 0091 [‘] 0091
24
+ [’] 00C2 0092 [’] 0092
25
+ [“] 00C2 0093 [“] 0093
26
+ [”] 00C2 0094 [”] 0094
27
+ [•] 00C2 0095 [•] 0095
28
+ [–] 00C2 0096 [–] 0096
29
+ [—] 00C2 0097 [—] 0097
30
+ [˜] 00C2 0098 [˜] 0098
31
+ [™] 00C2 0099 [™] 0099
32
+ [š] 00C2 009A [š] 009A
33
+ [›] 00C2 009B [›] 009B
34
+ [œ] 00C2 009C [œ] 009C
35
+ [] 00C2 009D [] 009D
36
+ [ž] 00C2 009E [ž] 009E
37
+ [Ÿ] 00C2 009F [Ÿ] 009F
38
+ [ ] 00C2 00A0 [ ] 00A0
39
+ [¡] 00C2 00A1 [¡] 00A1
40
+ [¢] 00C2 00A2 [¢] 00A2
41
+ [£] 00C2 00A3 [£] 00A3
42
+ [¤] 00C2 00A4 [¤] 00A4
43
+ [Â¥] 00C2 00A5 [¥] 00A5
44
+ [¦] 00C2 00A6 [¦] 00A6
45
+ [§] 00C2 00A7 [§] 00A7
46
+ [¨] 00C2 00A8 [¨] 00A8
47
+ [©] 00C2 00A9 [©] 00A9
48
+ [ª] 00C2 00AA [ª] 00AA
49
+ [«] 00C2 00AB [«] 00AB
50
+ [¬] 00C2 00AC [¬] 00AC
51
+ [­] 00C2 00AD [­] 00AD
52
+ [®] 00C2 00AE [®] 00AE
53
+ [¯] 00C2 00AF [¯] 00AF
54
+ [°] 00C2 00B0 [°] 00B0
55
+ [±] 00C2 00B1 [±] 00B1
56
+ [²] 00C2 00B2 [²] 00B2
57
+ [³] 00C2 00B3 [³] 00B3
58
+ [´] 00C2 00B4 [´] 00B4
59
+ [µ] 00C2 00B5 [µ] 00B5
60
+ [¶] 00C2 00B6 [¶] 00B6
61
+ [·] 00C2 00B7 [·] 00B7
62
+ [¸] 00C2 00B8 [¸] 00B8
63
+ [¹] 00C2 00B9 [¹] 00B9
64
+ [º] 00C2 00BA [º] 00BA
65
+ [»] 00C2 00BB [»] 00BB
66
+ [¼] 00C2 00BC [¼] 00BC
67
+ [½] 00C2 00BD [½] 00BD
68
+ [¾] 00C2 00BE [¾] 00BE
69
+ [¿] 00C2 00BF [¿] 00BF
70
+ [ÂŒ] 00C2 0152 [Œ] 008C
71
+ [œ] 00C2 0153 [œ] 009C
72
+ [Š] 00C2 0160 [Š] 008A
73
+ [š] 00C2 0161 [š] 009A
74
+ [Ÿ] 00C2 0178 [Ÿ] 009F
75
+ [ÂŽ] 00C2 017D [Ž] 008E
76
+ [ž] 00C2 017E [ž] 009E
77
+ [ƒ] 00C2 0192 [ƒ] 0083
78
+ [ˆ] 00C2 02C6 [ˆ] 0088
79
+ [˜] 00C2 02DC [˜] 0098
80
+ [–] 00C2 2013 [–] 0096
81
+ [—] 00C2 2014 [—] 0097
82
+ [‘] 00C2 2018 [‘] 0091
83
+ [Â’] 00C2 2019 [’] 0092
84
+ [‚] 00C2 201A [‚] 0082
85
+ [“] 00C2 201C [“] 0093
86
+ [”] 00C2 201D [”] 0094
87
+ [„] 00C2 201E [„] 0084
88
+ [†] 00C2 2020 [†] 0086
89
+ [‡] 00C2 2021 [‡] 0087
90
+ [•] 00C2 2022 [•] 0095
91
+ [Â…] 00C2 2026 […] 0085
92
+ [‰] 00C2 2030 [‰] 0089
93
+ [‹] 00C2 2039 [‹] 008B
94
+ [›] 00C2 203A [›] 009B
95
+ [€] 00C2 20AC [€] 0080
96
+ [™] 00C2 2122 [™] 0099
97
+ [Â�] 00C2 FFFD [] 0081
98
+ [À] 00C3 0080 [À] 00C0
99
+ [Á] 00C3 0081 [Á] 00C1
100
+ [Â] 00C3 0082 [Â] 00C2
101
+ [Ã] 00C3 0083 [Ã] 00C3
102
+ [Ä] 00C3 0084 [Ä] 00C4
103
+ [Å] 00C3 0085 [Å] 00C5
104
+ [Æ] 00C3 0086 [Æ] 00C6
105
+ [Ç] 00C3 0087 [Ç] 00C7
106
+ [È] 00C3 0088 [È] 00C8
107
+ [É] 00C3 0089 [É] 00C9
108
+ [Ê] 00C3 008A [Ê] 00CA
109
+ [Ë] 00C3 008B [Ë] 00CB
110
+ [Ì] 00C3 008C [Ì] 00CC
111
+ [Í] 00C3 008D [Í] 00CD
112
+ [Î] 00C3 008E [Î] 00CE
113
+ [Ï] 00C3 008F [Ï] 00CF
114
+ [Ð] 00C3 0090 [Ð] 00D0
115
+ [Ñ] 00C3 0091 [Ñ] 00D1
116
+ [Ò] 00C3 0092 [Ò] 00D2
117
+ [Ó] 00C3 0093 [Ó] 00D3
118
+ [Ô] 00C3 0094 [Ô] 00D4
119
+ [Õ] 00C3 0095 [Õ] 00D5
120
+ [Ö] 00C3 0096 [Ö] 00D6
121
+ [×] 00C3 0097 [×] 00D7
122
+ [Ø] 00C3 0098 [Ø] 00D8
123
+ [Ù] 00C3 0099 [Ù] 00D9
124
+ [Ú] 00C3 009A [Ú] 00DA
125
+ [Û] 00C3 009B [Û] 00DB
126
+ [Ü] 00C3 009C [Ü] 00DC
127
+ [Ý] 00C3 009D [Ý] 00DD
128
+ [Þ] 00C3 009E [Þ] 00DE
129
+ [ß] 00C3 009F [ß] 00DF
130
+ [à] 00C3 00A0 [à] 00E0
131
+ [á] 00C3 00A1 [á] 00E1
132
+ [â] 00C3 00A2 [â] 00E2
133
+ [ã] 00C3 00A3 [ã] 00E3
134
+ [ä] 00C3 00A4 [ä] 00E4
135
+ [Ã¥] 00C3 00A5 [å] 00E5
136
+ [æ] 00C3 00A6 [æ] 00E6
137
+ [ç] 00C3 00A7 [ç] 00E7
138
+ [è] 00C3 00A8 [è] 00E8
139
+ [é] 00C3 00A9 [é] 00E9
140
+ [ê] 00C3 00AA [ê] 00EA
141
+ [ë] 00C3 00AB [ë] 00EB
142
+ [ì] 00C3 00AC [ì] 00EC
143
+ [í] 00C3 00AD [í] 00ED
144
+ [î] 00C3 00AE [î] 00EE
145
+ [ï] 00C3 00AF [ï] 00EF
146
+ [ð] 00C3 00B0 [ð] 00F0
147
+ [ñ] 00C3 00B1 [ñ] 00F1
148
+ [ò] 00C3 00B2 [ò] 00F2
149
+ [ó] 00C3 00B3 [ó] 00F3
150
+ [ô] 00C3 00B4 [ô] 00F4
151
+ [õ] 00C3 00B5 [õ] 00F5
152
+ [ö] 00C3 00B6 [ö] 00F6
153
+ [÷] 00C3 00B7 [÷] 00F7
154
+ [ø] 00C3 00B8 [ø] 00F8
155
+ [ù] 00C3 00B9 [ù] 00F9
156
+ [ú] 00C3 00BA [ú] 00FA
157
+ [û] 00C3 00BB [û] 00FB
158
+ [ü] 00C3 00BC [ü] 00FC
159
+ [ý] 00C3 00BD [ý] 00FD
160
+ [þ] 00C3 00BE [þ] 00FE
161
+ [ÿ] 00C3 00BF [ÿ] 00FF
162
+ [ÃŒ] 00C3 0152 [Ì] 00CC
163
+ [Ü] 00C3 0153 [Ü] 00DC
164
+ [Ê] 00C3 0160 [Ê] 00CA
165
+ [Ú] 00C3 0161 [Ú] 00DA
166
+ [ß] 00C3 0178 [ß] 00DF
167
+ [ÃŽ] 00C3 017D [Î] 00CE
168
+ [Þ] 00C3 017E [Þ] 00DE
169
+ [Ã] 00C3 0192 [Ã] 00C3
170
+ [È] 00C3 02C6 [È] 00C8
171
+ [Ø] 00C3 02DC [Ø] 00D8
172
+ [Ö] 00C3 2013 [Ö] 00D6
173
+ [×] 00C3 2014 [×] 00D7
174
+ [Ñ] 00C3 2018 [Ñ] 00D1
175
+ [Ã’] 00C3 2019 [Ò] 00D2
176
+ [Â] 00C3 201A [Â] 00C2
177
+ [Ó] 00C3 201C [Ó] 00D3
178
+ [Ô] 00C3 201D [Ô] 00D4
179
+ [Ä] 00C3 201E [Ä] 00C4
180
+ [Æ] 00C3 2020 [Æ] 00C6
181
+ [Ç] 00C3 2021 [Ç] 00C7
182
+ [Õ] 00C3 2022 [Õ] 00D5
183
+ [Ã…] 00C3 2026 [Å] 00C5
184
+ [É] 00C3 2030 [É] 00C9
185
+ [Ë] 00C3 2039 [Ë] 00CB
186
+ [Û] 00C3 203A [Û] 00DB
187
+ [À] 00C3 20AC [À] 00C0
188
+ [Ù] 00C3 2122 [Ù] 00D9
189
+ [Ã�] 00C3 FFFD [Á] 00C1
190
+ [Œ] 00C5 0092 [Œ] 0152
191
+ [œ] 00C5 0093 [œ] 0153
192
+ [Å ] 00C5 00A0 [Š] 0160
193
+ [Å¡] 00C5 00A1 [š] 0161
194
+ [Ÿ] 00C5 00B8 [Ÿ] 0178
195
+ [Ž] 00C5 00BD [Ž] 017D
196
+ [ž] 00C5 00BE [ž] 017E
197
+ [Å’] 00C5 2019 [Œ] 0152
198
+ [Å“] 00C5 201C [œ] 0153
199
+ [ƒ] 00C6 0092 [ƒ] 0192
200
+ [Æ’] 00C6 2019 [ƒ] 0192
201
+ [ˆ] 00CB 0086 [ˆ] 02C6
202
+ [˜] 00CB 009C [˜] 02DC
203
+ [Ëœ] 00CB 0153 [˜] 02DC
204
+ [ˆ] 00CB 2020 [ˆ] 02C6
205
+ [ ] 00E2 0080 0080 [ ] 2000
206
+ [ ] 00E2 0080 0081 [ ] 2001
207
+ [ ] 00E2 0080 0082 [ ] 2002
208
+ [ ] 00E2 0080 0083 [ ] 2003
209
+ [ ] 00E2 0080 0084 [ ] 2004
210
+ [ ] 00E2 0080 0085 [ ] 2005
211
+ [ ] 00E2 0080 0086 [ ] 2006
212
+ [ ] 00E2 0080 0087 [ ] 2007
213
+ [ ] 00E2 0080 0088 [ ] 2008
214
+ [ ] 00E2 0080 0089 [ ] 2009
215
+ [ ] 00E2 0080 008A [ ] 200A
216
+ [​] 00E2 0080 008B [​] 200B
217
+ [–] 00E2 0080 0093 [–] 2013
218
+ [—] 00E2 0080 0094 [—] 2014
219
+ [‘] 00E2 0080 0098 [‘] 2018
220
+ [’] 00E2 0080 0099 [’] 2019
221
+ [‚] 00E2 0080 009A [‚] 201A
222
+ [“] 00E2 0080 009C [“] 201C
223
+ [”] 00E2 0080 009D [”] 201D
224
+ [„] 00E2 0080 009E [„] 201E
225
+ [†] 00E2 0080 00A0 [†] 2020
226
+ [‡] 00E2 0080 00A1 [‡] 2021
227
+ [•] 00E2 0080 00A2 [•] 2022
228
+ […] 00E2 0080 00A6 […] 2026
229
+ [‰] 00E2 0080 00B0 [‰] 2030
230
+ [‹] 00E2 0080 00B9 [‹] 2039
231
+ [›] 00E2 0080 00BA [›] 203A
232
+ [⁠] 00E2 0081 00A0 [⁠] 2060
233
+ [€] 00E2 0082 00AC [€] 20AC
234
+ [™] 00E2 0084 00A2 [™] 2122
235
+ [€] 00E2 201A 00AC [€] 20AC
236
+ [â„¢] 00E2 201E 00A2 [™] 2122
237
+ [ ] 00E2 20AC 0081 [ ] 2001
238
+ [”] 00E2 20AC 009D [”] 201D
239
+ [†] 00E2 20AC 00A0 [†] 2020
240
+ [‡] 00E2 20AC 00A1 [‡] 2021
241
+ [•] 00E2 20AC 00A2 [•] 2022
242
+ […] 00E2 20AC 00A6 […] 2026
243
+ [‰] 00E2 20AC 00B0 [‰] 2030
244
+ [‹] 00E2 20AC 00B9 [‹] 2039
245
+ [›] 00E2 20AC 00BA [›] 203A
246
+ [“] 00E2 20AC 0153 [“] 201C
247
+ [ ] 00E2 20AC 0160 [ ] 200A
248
+ [‚] 00E2 20AC 0161 [‚] 201A
249
+ [„] 00E2 20AC 017E [„] 201E
250
+ [ ] 00E2 20AC 0192 [ ] 2003
251
+ [ ] 00E2 20AC 02C6 [ ] 2008
252
+ [‘] 00E2 20AC 02DC [‘] 2018
253
+ [ ] 00E2 20AC 201A [ ] 2002
254
+ [–] 00E2 20AC 201C [–] 2013
255
+ [—] 00E2 20AC 201D [—] 2014
256
+ [ ] 00E2 20AC 201E [ ] 2004
257
+ [ ] 00E2 20AC 2020 [ ] 2006
258
+ [ ] 00E2 20AC 2021 [ ] 2007
259
+ [ ] 00E2 20AC 2026 [ ] 2005
260
+ [ ] 00E2 20AC 2030 [ ] 2009
261
+ [​] 00E2 20AC 2039 [​] 200B
262
+ [ ] 00E2 20AC 20AC [ ] 2000
263
+ [’] 00E2 20AC 2122 [’] 2019
264
+ [â€�] 00E2 20AC FFFD [”] 201D
265
+ [â� ] 00E2 FFFD 00A0 [⁠] 2060
266
+ [] 00EF 00BB 00BF [] FEFF
267
+ [�] 00EF 00BF 00BD [�] FFFD
268
+ [￾] 00EF 00BF 00BE [￾] FFFE
@@ -16,7 +16,7 @@
16
16
 
17
17
  module Iudex
18
18
  module Core
19
- VERSION = '1.0.0'
19
+ VERSION = '1.1.0'
20
20
 
21
21
  LIB_DIR = File.dirname( __FILE__ ) # :nodoc:
22
22
  end
Binary file
@@ -0,0 +1,73 @@
1
+ #--
2
+ # Copyright (c) 2011 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You
6
+ # may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ require 'iudex-core'
18
+ require 'java'
19
+
20
+ module Iudex::Core
21
+
22
+ module MojiBake
23
+ DEFAULT_CONFIG = File.join( File.dirname( __FILE__ ),
24
+ '..', '..', 'config', 'mojibake' )
25
+
26
+ def self.load_config( file = DEFAULT_CONFIG )
27
+ regex = nil
28
+ mojis = []
29
+ File.open( file ) do |fin|
30
+ fin.each do |line|
31
+ case line
32
+ when %r{^/([^/]+)/$}
33
+ regex = $1
34
+ when /^\[.*?\]\s+([0-9A-F ]+)\s+\[.*\]\s+([0-9A-F]+)$/
35
+ mojis << [ $1.split( ' ' ), $2 ]
36
+ end
37
+ end
38
+ end
39
+
40
+ mh = Java::java.util.HashMap.new( 512 )
41
+ mojis.each do | moji, rpl |
42
+ mh.put( jstring( moji ), jstring( rpl ) )
43
+ end
44
+ [ regex, mh ]
45
+ end
46
+
47
+ private
48
+
49
+ def self.jstring( cps )
50
+ cs = cps.map { |cp| cp.hex }.to_java( :char )
51
+ Java::java.lang.String.new( cs )
52
+ end
53
+
54
+ end
55
+
56
+ module Filters
57
+ import 'iudex.core.filters.MojiBakeFilter'
58
+
59
+ # Re-open iudex.core.filters.MojiBakeFilter to add config file
60
+ # based initialization.
61
+ class MojiBakeFilter
62
+
63
+ # Alt constructor taking a configuration file in `mojibake -t`
64
+ # format.
65
+ def initialize( key, config_file = MojiBake::DEFAULT_CONFIG )
66
+ super( key, *MojiBake.load_config( config_file ) )
67
+ end
68
+
69
+ end
70
+
71
+ end
72
+
73
+ end
data/lib/iudex-core.rb CHANGED
@@ -1,3 +1,4 @@
1
+
1
2
  #--
2
3
  # Copyright (c) 2008-2011 David Kellum
3
4
  #
@@ -31,7 +32,9 @@ module Iudex
31
32
 
32
33
  import 'iudex.core.ContentKeys'
33
34
  import 'iudex.core.ContentSource'
34
- import 'iudex.core.VisitExecutor'
35
+ import 'iudex.core.VisitManager'
36
+ import 'iudex.core.VisitQueueFactory'
37
+ import 'iudex.core.VisitQueue'
35
38
  import 'iudex.core.VisitURL'
36
39
 
37
40
  module Filters
@@ -40,10 +43,13 @@ module Iudex
40
43
  import 'iudex.core.filters.DateChangeFilter'
41
44
  import 'iudex.core.filters.DefaultFilter'
42
45
  import 'iudex.core.filters.FutureDateFilter'
43
- import 'iudex.core.filters.RLDomainFilter'
46
+ import 'iudex.core.filters.RedirectHandler'
47
+ import 'iudex.core.filters.Revisitor'
44
48
  import 'iudex.core.filters.TextCtrlWSFilter'
45
49
  import 'iudex.core.filters.UHashMDCSetter'
46
50
  end
47
51
 
48
52
  end
49
53
  end
54
+
55
+ require 'iudex-core/mojibake'
data/pom.xml CHANGED
@@ -5,13 +5,13 @@
5
5
  <groupId>iudex</groupId>
6
6
  <artifactId>iudex-core</artifactId>
7
7
  <packaging>jar</packaging>
8
- <version>1.0.0</version>
8
+ <version>1.1.0</version>
9
9
  <name>Iudex Core System</name>
10
10
 
11
11
  <parent>
12
12
  <groupId>iudex</groupId>
13
13
  <artifactId>iudex-parent</artifactId>
14
- <version>1.0</version>
14
+ <version>1.1</version>
15
15
  <relativePath>..</relativePath>
16
16
  </parent>
17
17
 
@@ -30,19 +30,19 @@
30
30
  <dependency>
31
31
  <groupId>iudex</groupId>
32
32
  <artifactId>iudex-filter</artifactId>
33
- <version>[1.0,1.1)</version>
33
+ <version>[1.1,1.2)</version>
34
34
  </dependency>
35
35
 
36
36
  <dependency>
37
37
  <groupId>iudex</groupId>
38
38
  <artifactId>iudex-http</artifactId>
39
- <version>[1.0,1.1)</version>
39
+ <version>[1.1,1.2)</version>
40
40
  </dependency>
41
41
 
42
42
  <dependency>
43
43
  <groupId>iudex</groupId>
44
44
  <artifactId>iudex-barc</artifactId>
45
- <version>[1.0,1.1)</version>
45
+ <version>[1.1,1.2)</version>
46
46
  </dependency>
47
47
 
48
48
  <dependency>
@@ -37,10 +37,16 @@ module TestHTTPMocks
37
37
  WEAK_ETAG = 'W/"weak-etag"'
38
38
 
39
39
  class MockSession < Iudex::HTTP::HTTPSession
40
- import 'com.gravitext.util.ByteBufferInputStream'
41
40
  import 'java.nio.ByteBuffer'
42
41
  include Iudex::HTTP
43
42
 
43
+ attr_writer :status
44
+
45
+ def initialize
46
+ super()
47
+ @status = 200
48
+ end
49
+
44
50
  def requestHeaders
45
51
  [ ]
46
52
  end
@@ -49,12 +55,12 @@ module TestHTTPMocks
49
55
  [ Header.new( "ETag", WEAK_ETAG ) ]
50
56
  end
51
57
 
52
- def responseCode
53
- 200
58
+ def statusCode
59
+ @status
54
60
  end
55
61
 
56
- def responseStream
57
- ByteBufferInputStream.new( ByteBuffer::wrap( "".to_java_bytes ) )
62
+ def responseBody
63
+ ByteBuffer::wrap( "BODY".to_java_bytes )
58
64
  end
59
65
 
60
66
  def statusText
@@ -62,7 +68,10 @@ module TestHTTPMocks
62
68
  end
63
69
 
64
70
  def execute( handler )
65
- handler.handle_success( self )
71
+ handler.session_completed( self )
72
+ end
73
+
74
+ def close
66
75
  end
67
76
  end
68
77
 
@@ -88,6 +97,20 @@ module TestHTTPMocks
88
97
  end
89
98
  end
90
99
 
100
+ import 'iudex.core.VisitCounter'
101
+
102
+ class TestVisitCounter
103
+ include VisitCounter
104
+ attr_reader :released
105
+
106
+ def add( order )
107
+ end
108
+
109
+ def release( acquired, newOrder )
110
+ @released = acquired.url
111
+ end
112
+ end
113
+
91
114
  end
92
115
 
93
116
  class TestContentFetcher < MiniTest::Unit::TestCase
@@ -119,54 +142,27 @@ class TestContentFetcher < MiniTest::Unit::TestCase
119
142
  def test_304
120
143
  client = MockHTTPClient.new
121
144
  def client.request( session, handler )
122
- handler.handle_error( session, 304 )
145
+ session.status = 304
146
+ handler.session_completed( session )
123
147
  end
124
148
  fetch( create_content, client ) do |out|
125
149
  assert_equal( DEFAULT_URL, out.url.to_s )
126
150
  assert_equal( 304, out.status )
127
- assert_nil( out.etag )
128
- assert_nil( out.source )
129
- end
130
- end
131
-
132
- REDIRECT_URL = "http://gravitext.com/redirect#foo"
133
- REDIRECT_NORM = "http://gravitext.com/redirect"
134
-
135
- def test_redirect
136
- client = MockHTTPClient.new
137
- def client.create_session
138
- s = MockSession.new
139
- def s.execute( handler )
140
- self.url = REDIRECT_URL
141
- super
142
- end
143
- s
144
- end
145
- fetch( create_content, client ) do |out|
146
- assert_equal( REDIRECT_NORM, out.url.to_s )
147
- assert_equal( 200, out.status )
148
-
149
- ref = out.referer
150
-
151
- assert_equal( DEFAULT_URL, ref.url.to_s )
152
- assert_equal( 302, ref.status )
153
- assert_equal( REDIRECT_NORM, ref.referent.url.to_s )
154
151
  end
155
152
  end
156
153
 
157
154
  import "java.net.UnknownHostException"
158
- import "java.io.IOException"
159
155
 
160
156
  def test_connect_error
161
157
  client = MockHTTPClient.new
162
158
  def client.create_session
163
159
  s = MockSession.new
164
160
  def s.execute( handler )
165
- handler.handle_exception( self,
166
- UnknownHostException.new( "foobar.com" ) )
161
+ self.error = UnknownHostException.new( "foobar.com" )
162
+ handler.session_completed( self )
167
163
  end
168
- def s.responseCode
169
- nil
164
+ def s.statusCode
165
+ -1
170
166
  end
171
167
  def s.responseHeaders
172
168
  nil
@@ -182,7 +178,9 @@ class TestContentFetcher < MiniTest::Unit::TestCase
182
178
 
183
179
  def fetch( content, client = MockHTTPClient.new, &block )
184
180
  rec = TestReceiver.new( &block )
181
+ counter = TestVisitCounter.new
185
182
  cf = ContentFetcher.new( client,
183
+ counter,
186
184
  FilterChain.new( "test-rec", [ rec ] ) )
187
185
  cf.filter( content )
188
186
  end
@@ -0,0 +1,75 @@
1
+ #!/usr/bin/env jruby
2
+ #.hashdot.profile += jruby-shortlived
3
+
4
+ #--
5
+ # Copyright (c) 2011 David Kellum
6
+ #
7
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
8
+ # may not use this file except in compliance with the License. You
9
+ # may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
16
+ # implied. See the License for the specific language governing
17
+ # permissions and limitations under the License.
18
+ #++
19
+
20
+ require File.join( File.dirname( __FILE__ ), "setup" )
21
+ require 'iudex-core'
22
+
23
+ class TestContentSource < MiniTest::Unit::TestCase
24
+ include Iudex::Core
25
+
26
+ import 'java.nio.ByteBuffer'
27
+ import 'java.nio.charset.Charset'
28
+
29
+ def self.charset( name )
30
+ Charset::lookup( name )
31
+ end
32
+
33
+ UTF8 = charset( "UTF-8" )
34
+ ISO1 = charset( "ISO-8859-1" )
35
+
36
+ def setup
37
+ @cs = ContentSource.new( ByteBuffer::wrap( "any".to_java_bytes ) )
38
+ end
39
+
40
+ def test_default_encoding
41
+ refute( @cs.default_encoding )
42
+ end
43
+
44
+ def test_default_encoding
45
+ assert( @cs.set_default_encoding( UTF8, 0.0 ) )
46
+ assert_equal( UTF8, @cs.default_encoding )
47
+ assert_in_epsilon( 0.0, @cs.encoding_confidence )
48
+ end
49
+
50
+ def test_default_encoding_additive
51
+ 2.times { assert( @cs.set_default_encoding( UTF8, 0.10 ) ) }
52
+ assert_equal( UTF8, @cs.default_encoding )
53
+ assert_in_epsilon( 0.20, @cs.encoding_confidence )
54
+ end
55
+
56
+ def test_default_encoding_map
57
+ assert( @cs.set_default_encoding( { UTF8 => f( 0.10 ),
58
+ ISO1 => f( 0.20 ) } ) )
59
+ assert_equal( ISO1, @cs.default_encoding )
60
+ assert_in_epsilon( 0.20, @cs.encoding_confidence )
61
+
62
+ refute( @cs.set_default_encoding( {} ) )
63
+ refute( @cs.set_default_encoding( { UTF8 => f( 0.05 ) } ) )
64
+ assert( @cs.set_default_encoding( { UTF8 => f( 0.07 ),
65
+ ISO1 => f( 0.01 ) } ) )
66
+
67
+ assert_equal( UTF8, @cs.default_encoding )
68
+ assert_in_epsilon( 0.22, @cs.encoding_confidence )
69
+ end
70
+
71
+ def f( v )
72
+ Java::java.lang.Float.new( v )
73
+ end
74
+
75
+ end
@@ -0,0 +1,58 @@
1
+ #!/usr/bin/env jruby
2
+ # -*- coding: utf-8 -*-
3
+ #.hashdot.profile += jruby-shortlived
4
+
5
+ #--
6
+ # Copyright (c) 2011 David Kellum
7
+ #
8
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
9
+ # may not use this file except in compliance with the License. You
10
+ # may obtain a copy of the License at
11
+ #
12
+ # http://www.apache.org/licenses/LICENSE-2.0
13
+ #
14
+ # Unless required by applicable law or agreed to in writing, software
15
+ # distributed under the License is distributed on an "AS IS" BASIS,
16
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
17
+ # implied. See the License for the specific language governing
18
+ # permissions and limitations under the License.
19
+ #++
20
+
21
+ require File.join( File.dirname( __FILE__ ), "setup" )
22
+ require 'iudex-core/mojibake'
23
+
24
+ class TestMojiBake < MiniTest::Unit::TestCase
25
+ include Gravitext::HTMap
26
+ include Iudex::Core
27
+ include Iudex::Core::Filters
28
+
29
+ UniMap.define_accessors
30
+
31
+ FILTER = MojiBakeFilter.new( ContentKeys::SUMMARY )
32
+
33
+ def test_nomatch_recover
34
+ assert_filter( '', '' )
35
+ assert_filter( 'ascii', 'ascii' )
36
+ assert_filter( 'Â', 'Â' )
37
+ end
38
+
39
+ def test_simple_recover
40
+ assert_filter( '[°]', '[°]' )
41
+ assert_filter( '“quoted”', '“quotedâ€�' )
42
+ assert_filter( '“quoted”', '“quoted”' )
43
+ end
44
+
45
+ def test_recursive_recover
46
+ assert_filter( '°', '°' )
47
+ assert_filter( 'AP – Greenlake', 'AP – Greenlake' )
48
+ assert_filter( 'you’re', 'you’re' )
49
+ end
50
+
51
+ def assert_filter( output, input )
52
+ map = UniMap.new
53
+ map.summary = input
54
+ assert( FILTER.filter( map ) )
55
+ assert_equal( output, map.summary.to_s, "From: #{input}" )
56
+ end
57
+
58
+ end