charguess 1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (135) hide show
  1. data/History.txt +4 -0
  2. data/Manifest.txt +134 -0
  3. data/PostInstall.txt +7 -0
  4. data/README.rdoc +26 -0
  5. data/Rakefile +25 -0
  6. data/ext/charguess/charguess.c +29 -0
  7. data/ext/charguess/extconf.rb +11 -0
  8. data/ext/libcharguess/cpp/.deps/EUCJPProber.Plo +1 -0
  9. data/ext/libcharguess/cpp/.deps/EUCJPProber.Po +87 -0
  10. data/ext/libcharguess/cpp/.deps/EUCKRProber.Plo +1 -0
  11. data/ext/libcharguess/cpp/.deps/EUCKRProber.Po +85 -0
  12. data/ext/libcharguess/cpp/.deps/EUCTWProber.Plo +1 -0
  13. data/ext/libcharguess/cpp/.deps/EUCTWProber.Po +85 -0
  14. data/ext/libcharguess/cpp/.deps/EscCharsetProber.Plo +1 -0
  15. data/ext/libcharguess/cpp/.deps/EscCharsetProber.Po +83 -0
  16. data/ext/libcharguess/cpp/.deps/EscSM.Plo +1 -0
  17. data/ext/libcharguess/cpp/.deps/EscSM.Po +77 -0
  18. data/ext/libcharguess/cpp/.deps/GB2312Prober.Plo +1 -0
  19. data/ext/libcharguess/cpp/.deps/GB2312Prober.Po +85 -0
  20. data/ext/libcharguess/cpp/.deps/LangBulgarianModel.Plo +1 -0
  21. data/ext/libcharguess/cpp/.deps/LangBulgarianModel.Po +78 -0
  22. data/ext/libcharguess/cpp/.deps/LangCyrillicModel.Plo +1 -0
  23. data/ext/libcharguess/cpp/.deps/LangCyrillicModel.Po +78 -0
  24. data/ext/libcharguess/cpp/.deps/LangGreekModel.Plo +1 -0
  25. data/ext/libcharguess/cpp/.deps/LangGreekModel.Po +78 -0
  26. data/ext/libcharguess/cpp/.deps/LangHungarianModel.Plo +1 -0
  27. data/ext/libcharguess/cpp/.deps/LangHungarianModel.Po +78 -0
  28. data/ext/libcharguess/cpp/.deps/LangThaiModel.Plo +1 -0
  29. data/ext/libcharguess/cpp/.deps/LangThaiModel.Po +78 -0
  30. data/ext/libcharguess/cpp/.deps/Latin1Prober.Plo +1 -0
  31. data/ext/libcharguess/cpp/.deps/Latin1Prober.Po +78 -0
  32. data/ext/libcharguess/cpp/.deps/MBCSGroupProber.Plo +1 -0
  33. data/ext/libcharguess/cpp/.deps/MBCSGroupProber.Po +102 -0
  34. data/ext/libcharguess/cpp/.deps/MBCSSM.Plo +1 -0
  35. data/ext/libcharguess/cpp/.deps/MBCSSM.Po +77 -0
  36. data/ext/libcharguess/cpp/.deps/SBCSGroupProber.Plo +1 -0
  37. data/ext/libcharguess/cpp/.deps/SBCSGroupProber.Po +80 -0
  38. data/ext/libcharguess/cpp/.deps/SBCharsetProber.Plo +1 -0
  39. data/ext/libcharguess/cpp/.deps/SBCharsetProber.Po +78 -0
  40. data/ext/libcharguess/cpp/.deps/SJISProber.Plo +1 -0
  41. data/ext/libcharguess/cpp/.deps/SJISProber.Po +86 -0
  42. data/ext/libcharguess/cpp/.deps/UTF8Prober.Plo +1 -0
  43. data/ext/libcharguess/cpp/.deps/UTF8Prober.Po +82 -0
  44. data/ext/libcharguess/cpp/.deps/big5Prober.Plo +1 -0
  45. data/ext/libcharguess/cpp/.deps/big5Prober.Po +84 -0
  46. data/ext/libcharguess/cpp/.deps/charDistribution.Plo +1 -0
  47. data/ext/libcharguess/cpp/.deps/charDistribution.Po +87 -0
  48. data/ext/libcharguess/cpp/.deps/chardet.Plo +1 -0
  49. data/ext/libcharguess/cpp/.deps/chardet.Po +84 -0
  50. data/ext/libcharguess/cpp/.deps/charguess.Po +77 -0
  51. data/ext/libcharguess/cpp/.deps/jpCntx.Plo +1 -0
  52. data/ext/libcharguess/cpp/.deps/jpCntx.Po +75 -0
  53. data/ext/libcharguess/cpp/.deps/universal.Plo +1 -0
  54. data/ext/libcharguess/cpp/.deps/universal.Po +111 -0
  55. data/ext/libcharguess/cpp/AUTHORS +3 -0
  56. data/ext/libcharguess/cpp/Big5Freq.tab +928 -0
  57. data/ext/libcharguess/cpp/COPYING +340 -0
  58. data/ext/libcharguess/cpp/COPYRIGHT +20 -0
  59. data/ext/libcharguess/cpp/ChangeLog +0 -0
  60. data/ext/libcharguess/cpp/EUCJPProber.cpp +80 -0
  61. data/ext/libcharguess/cpp/EUCJPProber.h +58 -0
  62. data/ext/libcharguess/cpp/EUCKRFreq.tab +615 -0
  63. data/ext/libcharguess/cpp/EUCKRProber.cpp +80 -0
  64. data/ext/libcharguess/cpp/EUCKRProber.h +54 -0
  65. data/ext/libcharguess/cpp/EUCTWFreq.tab +448 -0
  66. data/ext/libcharguess/cpp/EUCTWProber.cpp +79 -0
  67. data/ext/libcharguess/cpp/EUCTWProber.h +53 -0
  68. data/ext/libcharguess/cpp/EscCharsetProber.cpp +89 -0
  69. data/ext/libcharguess/cpp/EscCharsetProber.h +49 -0
  70. data/ext/libcharguess/cpp/EscSM.cpp +244 -0
  71. data/ext/libcharguess/cpp/GB2312Freq.tab +476 -0
  72. data/ext/libcharguess/cpp/GB2312Prober.cpp +84 -0
  73. data/ext/libcharguess/cpp/GB2312Prober.h +56 -0
  74. data/ext/libcharguess/cpp/INSTALL +229 -0
  75. data/ext/libcharguess/cpp/JISFreq.tab +574 -0
  76. data/ext/libcharguess/cpp/LICENSE +504 -0
  77. data/ext/libcharguess/cpp/LangBulgarianModel.cpp +230 -0
  78. data/ext/libcharguess/cpp/LangCyrillicModel.cpp +340 -0
  79. data/ext/libcharguess/cpp/LangGreekModel.cpp +229 -0
  80. data/ext/libcharguess/cpp/LangHungarianModel.cpp +228 -0
  81. data/ext/libcharguess/cpp/LangThaiModel.cpp +206 -0
  82. data/ext/libcharguess/cpp/Latin1Prober.cpp +190 -0
  83. data/ext/libcharguess/cpp/Latin1Prober.h +49 -0
  84. data/ext/libcharguess/cpp/MBCSGroupProber.cpp +186 -0
  85. data/ext/libcharguess/cpp/MBCSGroupProber.h +58 -0
  86. data/ext/libcharguess/cpp/MBCSSM.cpp +610 -0
  87. data/ext/libcharguess/cpp/Makefile.am +45 -0
  88. data/ext/libcharguess/cpp/Makefile.in +608 -0
  89. data/ext/libcharguess/cpp/NEWS +0 -0
  90. data/ext/libcharguess/cpp/README +0 -0
  91. data/ext/libcharguess/cpp/SBCSGroupProber.cpp +244 -0
  92. data/ext/libcharguess/cpp/SBCSGroupProber.h +54 -0
  93. data/ext/libcharguess/cpp/SBCharsetProber.cpp +100 -0
  94. data/ext/libcharguess/cpp/SBCharsetProber.h +89 -0
  95. data/ext/libcharguess/cpp/SJISProber.cpp +86 -0
  96. data/ext/libcharguess/cpp/SJISProber.h +60 -0
  97. data/ext/libcharguess/cpp/UTF8Prober.cpp +75 -0
  98. data/ext/libcharguess/cpp/UTF8Prober.h +46 -0
  99. data/ext/libcharguess/cpp/aclocal.m4 +1008 -0
  100. data/ext/libcharguess/cpp/autogen.sh +153 -0
  101. data/ext/libcharguess/cpp/big5Prober.cpp +76 -0
  102. data/ext/libcharguess/cpp/big5Prober.h +53 -0
  103. data/ext/libcharguess/cpp/charDistribution.cpp +90 -0
  104. data/ext/libcharguess/cpp/charDistribution.h +219 -0
  105. data/ext/libcharguess/cpp/charguess.cpp +56 -0
  106. data/ext/libcharguess/cpp/charguess.h +23 -0
  107. data/ext/libcharguess/cpp/charsetProber.h +50 -0
  108. data/ext/libcharguess/cpp/codingStateMachine.h +92 -0
  109. data/ext/libcharguess/cpp/config.h +36 -0
  110. data/ext/libcharguess/cpp/config.h.in +35 -0
  111. data/ext/libcharguess/cpp/config.status +1075 -0
  112. data/ext/libcharguess/cpp/configure +5226 -0
  113. data/ext/libcharguess/cpp/configure.in +49 -0
  114. data/ext/libcharguess/cpp/depcomp +472 -0
  115. data/ext/libcharguess/cpp/fix_copyright +32 -0
  116. data/ext/libcharguess/cpp/install-sh +294 -0
  117. data/ext/libcharguess/cpp/jpCntx.cpp +194 -0
  118. data/ext/libcharguess/cpp/jpCntx.h +100 -0
  119. data/ext/libcharguess/cpp/missing +336 -0
  120. data/ext/libcharguess/cpp/mkinstalldirs +111 -0
  121. data/ext/libcharguess/cpp/pkgInt.h +72 -0
  122. data/ext/libcharguess/cpp/stamp-h1 +1 -0
  123. data/ext/libcharguess/cpp/test/test.cpp +78 -0
  124. data/ext/libcharguess/cpp/types.h +41 -0
  125. data/ext/libcharguess/cpp/universal.cpp +273 -0
  126. data/ext/libcharguess/cpp/universal.h +65 -0
  127. data/script/console +9 -0
  128. data/script/destroy +14 -0
  129. data/script/generate +14 -0
  130. data/tasks/extconf/charguess.rake +47 -0
  131. data/tasks/extconf.rake +13 -0
  132. data/test/test_charguess.rb +7 -0
  133. data/test/test_charguess_extn.rb +10 -0
  134. data/test/test_helper.rb +3 -0
  135. metadata +219 -0
@@ -0,0 +1,273 @@
1
+ /*
2
+ libcharguess - Guess the encoding/charset of a string
3
+ Copyright (C) 2003 Stephane Corbe <noubi@users.sourceforge.net>
4
+ Based on Mozilla sources
5
+
6
+ This library is free software; you can redistribute it and/or
7
+ modify it under the terms of the GNU Lesser General Public
8
+ License as published by the Free Software Foundation; either
9
+ version 2.1 of the License, or (at your option) any later version.
10
+
11
+ This library is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ Lesser General Public License for more details.
15
+
16
+ You should have received a copy of the GNU Lesser General Public
17
+ License along with this library; if not, write to the Free Software
18
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ */
20
+
21
+ #include "universal.h"
22
+
23
+ #include "MBCSGroupProber.h"
24
+ #include "SBCSGroupProber.h"
25
+ #include "EscCharsetProber.h"
26
+ #include "Latin1Prober.h"
27
+
28
+ nsUniversalDetector::nsUniversalDetector()
29
+ {
30
+ mDone = PR_FALSE;
31
+ mBestGuess = -1; //illegal value as signal
32
+ mInTag = PR_FALSE;
33
+ mEscCharSetProber = nsnull;
34
+
35
+ mStart = PR_TRUE;
36
+ mDetectedCharset = nsnull;
37
+ mGotData = PR_FALSE;
38
+ mInputState = ePureAscii;
39
+ mLastChar = '\0';
40
+
41
+ PRUint32 i;
42
+ for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
43
+ mCharSetProbers[i] = nsnull;
44
+ }
45
+
46
+ nsUniversalDetector::~nsUniversalDetector()
47
+ {
48
+ for (PRInt32 i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
49
+ if (mCharSetProbers[i])
50
+ delete mCharSetProbers[i];
51
+ if (mEscCharSetProber)
52
+ delete mEscCharSetProber;
53
+ }
54
+
55
+ void nsUniversalDetector::Reset()
56
+ {
57
+ mDone = PR_FALSE;
58
+ mBestGuess = -1; //illegal value as signal
59
+ mInTag = PR_FALSE;
60
+
61
+ mStart = PR_TRUE;
62
+ mDetectedCharset = nsnull;
63
+ mGotData = PR_FALSE;
64
+ mInputState = ePureAscii;
65
+ mLastChar = '\0';
66
+
67
+ if (mEscCharSetProber)
68
+ mEscCharSetProber->Reset();
69
+
70
+ PRUint32 i;
71
+ for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
72
+ if (mCharSetProbers[i])
73
+ mCharSetProbers[i]->Reset();
74
+ }
75
+
76
+ //---------------------------------------------------------------------
77
+ #define SHORTCUT_THRESHOLD (float)0.95
78
+ #define MINIMUM_THRESHOLD (float)0.20
79
+
80
+ void nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
81
+ {
82
+ if(mDone)
83
+ return;
84
+
85
+ if (aLen > 0)
86
+ mGotData = PR_TRUE;
87
+
88
+ //If the data starts with BOM, we know it is UTF
89
+ if (mStart)
90
+ {
91
+ mStart = PR_FALSE;
92
+ if (aLen > 3)
93
+ switch (aBuf[0])
94
+ {
95
+ case '\xEF':
96
+ if (('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2]))
97
+ // EF BB BF UTF-8 encoded BOM
98
+ mDetectedCharset = "UTF-8";
99
+ break;
100
+ case '\xFE':
101
+ if (('\xFF' == aBuf[1]) && ('\x00' == aBuf[2]) && ('\x00' == aBuf[3]))
102
+ // FE FF 00 00 UCS-4, unusual octet order BOM (3412)
103
+ mDetectedCharset = "X-ISO-10646-UCS-4-3412";
104
+ else if ('\xFF' == aBuf[1])
105
+ // FE FF UTF-16, big endian BOM
106
+ mDetectedCharset = "UTF-16BE";
107
+ break;
108
+ case '\x00':
109
+ if (('\x00' == aBuf[1]) && ('\xFE' == aBuf[2]) && ('\xFF' == aBuf[3]))
110
+ // 00 00 FE FF UTF-32, big-endian BOM
111
+ mDetectedCharset = "UTF-32BE";
112
+ else if (('\x00' == aBuf[1]) && ('\xFF' == aBuf[2]) && ('\xFE' == aBuf[3]))
113
+ // 00 00 FF FE UCS-4, unusual octet order BOM (2143)
114
+ mDetectedCharset = "X-ISO-10646-UCS-4-2143";
115
+ break;
116
+ case '\xFF':
117
+ if (('\xFE' == aBuf[1]) && ('\x00' == aBuf[2]) && ('\x00' == aBuf[3]))
118
+ // FF FE 00 00 UTF-32, little-endian BOM
119
+ mDetectedCharset = "UTF-32LE";
120
+ else if ('\xFE' == aBuf[1])
121
+ // FF FE UTF-16, little endian BOM
122
+ mDetectedCharset = "UTF-16LE";
123
+ break;
124
+ } // switch
125
+
126
+ if (mDetectedCharset)
127
+ {
128
+ mDone = PR_TRUE;
129
+ return;
130
+ }
131
+ }
132
+
133
+ PRUint32 i;
134
+ for (i = 0; i < aLen; i++)
135
+ {
136
+ //other than 0xa0, if every othe character is ascii, the page is ascii
137
+ if (aBuf[i] & '\x80' && aBuf[i] != '\xA0') //Since many Ascii only page contains NBSP
138
+ {
139
+ //we got a non-ascii byte (high-byte)
140
+ if (mInputState != eHighbyte)
141
+ {
142
+ //adjust state
143
+ mInputState = eHighbyte;
144
+
145
+ //kill mEscCharSetProber if it is active
146
+ if (mEscCharSetProber) {
147
+ delete mEscCharSetProber;
148
+ mEscCharSetProber = nsnull;
149
+ }
150
+
151
+ //start multibyte and singlebyte charset prober
152
+ if (nsnull == mCharSetProbers[0])
153
+ mCharSetProbers[0] = new nsMBCSGroupProber;
154
+ if (nsnull == mCharSetProbers[1])
155
+ mCharSetProbers[1] = new nsSBCSGroupProber;
156
+ if (nsnull == mCharSetProbers[2])
157
+ mCharSetProbers[2] = new nsLatin1Prober;
158
+ }
159
+ }
160
+ else
161
+ {
162
+ //ok, just pure ascii so far
163
+ if ( ePureAscii == mInputState &&
164
+ (aBuf[i] == '\033' || (aBuf[i] == '{' && mLastChar == '~')) )
165
+ {
166
+ //found escape character or HZ "~{"
167
+ mInputState = eEscAscii;
168
+ }
169
+ mLastChar = aBuf[i];
170
+ }
171
+ }
172
+
173
+ nsProbingState st;
174
+ switch (mInputState)
175
+ {
176
+ case eEscAscii:
177
+ if (nsnull == mEscCharSetProber)
178
+ mEscCharSetProber = new nsEscCharSetProber;
179
+ st = mEscCharSetProber->HandleData(aBuf, aLen);
180
+ if (st == eFoundIt)
181
+ {
182
+ mDone = PR_TRUE;
183
+ mDetectedCharset = mEscCharSetProber->GetCharSetName();
184
+ }
185
+ break;
186
+ case eHighbyte:
187
+ for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
188
+ {
189
+ st = mCharSetProbers[i]->HandleData(aBuf, aLen);
190
+ if (st == eFoundIt)
191
+ {
192
+ mDone = PR_TRUE;
193
+ mDetectedCharset = mCharSetProbers[i]->GetCharSetName();
194
+ return;
195
+ }
196
+ }
197
+ break;
198
+
199
+ default: //pure ascii
200
+ ;//do nothing here
201
+ }
202
+ return ;
203
+ }
204
+
205
+
206
+ //---------------------------------------------------------------------
207
+ void nsUniversalDetector::DataEnd()
208
+ {
209
+ if (!mGotData)
210
+ {
211
+ // we haven't got any data yet, return immediately
212
+ // caller program sometimes call DataEnd before anything has been sent to detector
213
+ return;
214
+ }
215
+
216
+ if (mDetectedCharset)
217
+ {
218
+ mDone = PR_TRUE;
219
+ Report(mDetectedCharset);
220
+ return;
221
+ }
222
+
223
+ switch (mInputState)
224
+ {
225
+ case eHighbyte:
226
+ {
227
+ float proberConfidence;
228
+ float maxProberConfidence = (float)0.0;
229
+ PRInt32 maxProber = 0;
230
+
231
+ for (PRInt32 i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
232
+ {
233
+ proberConfidence = mCharSetProbers[i]->GetConfidence();
234
+ #ifdef DEBUG_chardet
235
+ mCharSetProbers[i]->DumpStatus();
236
+ #endif
237
+
238
+ if (proberConfidence > maxProberConfidence)
239
+ {
240
+ maxProberConfidence = proberConfidence;
241
+ maxProber = i;
242
+ }
243
+ }
244
+ //do not report anything because we are not confident of it, that's in fact a negative answer
245
+ if (maxProberConfidence > MINIMUM_THRESHOLD)
246
+ Report(mCharSetProbers[maxProber]->GetCharSetName());
247
+ }
248
+ break;
249
+ case eEscAscii:
250
+ break;
251
+ default:
252
+ ;
253
+ }
254
+ return;
255
+ }
256
+
257
+
258
+ void nsUniversalDetector::Report(const char* aCharset)
259
+ {
260
+ if (!mDone)
261
+ {
262
+ mDone = PR_TRUE;
263
+ mDetectedCharset = aCharset;
264
+ }
265
+ }
266
+
267
+ const char* nsUniversalDetector::GetCharset(void)
268
+ {
269
+ if (mDone == PR_TRUE)
270
+ return (mDetectedCharset);
271
+ else
272
+ return NULL;
273
+ }
@@ -0,0 +1,65 @@
1
+ /*
2
+ libcharguess - Guess the encoding/charset of a string
3
+ Copyright (C) 2003 Stephane Corbe <noubi@users.sourceforge.net>
4
+ Based on Mozilla sources
5
+
6
+ This library is free software; you can redistribute it and/or
7
+ modify it under the terms of the GNU Lesser General Public
8
+ License as published by the Free Software Foundation; either
9
+ version 2.1 of the License, or (at your option) any later version.
10
+
11
+ This library is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ Lesser General Public License for more details.
15
+
16
+ You should have received a copy of the GNU Lesser General Public
17
+ License along with this library; if not, write to the Free Software
18
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ */
20
+
21
+ #ifndef universal_h__
22
+ #define universal_h__
23
+
24
+ #include "types.h"
25
+
26
+ // #include "nsICharsetDetector.h"
27
+ // #include "nsIStringCharsetDetector.h"
28
+ // #include "nsICharsetDetectionObserver.h"
29
+
30
+ class nsCharSetProber;
31
+
32
+ #define NUM_OF_CHARSET_PROBERS 3
33
+
34
+ typedef enum {
35
+ ePureAscii = 0,
36
+ eEscAscii = 1,
37
+ eHighbyte = 2
38
+ } nsInputState;
39
+
40
+ class nsUniversalDetector {
41
+ public:
42
+ nsUniversalDetector();
43
+ virtual ~nsUniversalDetector();
44
+ virtual void HandleData(const char* aBuf, PRUint32 aLen);
45
+ virtual void DataEnd(void);
46
+ virtual void Reset();
47
+ virtual const char* GetCharset(void);
48
+
49
+ protected:
50
+ virtual void Report(const char* aCharset);
51
+ nsInputState mInputState;
52
+ PRBool mDone;
53
+ PRBool mInTag;
54
+ PRBool mStart;
55
+ PRBool mGotData;
56
+ char mLastChar;
57
+ const char * mDetectedCharset;
58
+ PRInt32 mBestGuess;
59
+
60
+ nsCharSetProber *mCharSetProbers[NUM_OF_CHARSET_PROBERS];
61
+ nsCharSetProber *mEscCharSetProber;
62
+ };
63
+
64
+ #endif
65
+
data/script/console ADDED
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+ # File: script/console
3
+ irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
4
+
5
+ libs = " -r irb/completion"
6
+ # Perhaps use a console_lib to store any extra methods I may want available in the cosole
7
+ # libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
8
+ puts "Loading charguess gem"
9
+ exec "#{irb} #{libs} --simple-prompt"
data/script/destroy ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/destroy'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Destroy.new.run(ARGV)
data/script/generate ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/generate'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Generate.new.run(ARGV)
@@ -0,0 +1,47 @@
1
+ namespace :extconf do
2
+ extension = File.basename(__FILE__, '.rake')
3
+
4
+ ext = "ext/#{extension}"
5
+ ext_so = "#{ext}/#{extension}.#{Config::CONFIG['DLEXT']}"
6
+ ext_files = FileList[
7
+ "#{ext}/*.c",
8
+ "#{ext}/*.h",
9
+ "#{ext}/*.rl",
10
+ "#{ext}/extconf.rb",
11
+ "#{ext}/Makefile",
12
+ # "lib"
13
+ ]
14
+
15
+
16
+ task :compile => extension do
17
+ if Dir.glob("**/#{extension}.{o,so,dll}").length == 0
18
+ STDERR.puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
19
+ STDERR.puts "Gem actually failed to build. Your system is"
20
+ STDERR.puts "NOT configured properly to build charguess."
21
+ STDERR.puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
22
+ exit(1)
23
+ end
24
+ end
25
+
26
+ desc "Builds just the #{extension} extension"
27
+ task extension.to_sym => ["#{ext}/Makefile", ext_so ]
28
+
29
+ file "#{ext}/Makefile" => ["#{ext}/extconf.rb"] do
30
+ Dir.chdir("ext/libcharguess/cpp") do
31
+ sh("./configure")
32
+ sh("make")
33
+ end
34
+ Dir.chdir(ext) do ruby "extconf.rb" end
35
+ end
36
+
37
+ file ext_so => ext_files do
38
+ Dir.chdir(ext) do
39
+ sh(PLATFORM =~ /win32/ ? 'nmake' : 'make') do |ok, res|
40
+ if !ok
41
+ require "fileutils"
42
+ FileUtils.rm Dir.glob('*.{so,o,dll,bundle}')
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,13 @@
1
+ namespace :extconf do
2
+ desc "Compiles the Ruby extension"
3
+ task :compile
4
+ end
5
+
6
+ task :compile => "extconf:compile"
7
+
8
+ task :test => :compile
9
+
10
+ BIN = "*.{bundle,jar,so,obj,pdb,lib,def,exp}"
11
+ $hoe.clean_globs |= ["ext/**/#{BIN}", "lib/**/#{BIN}", 'ext/**/Makefile']
12
+ $hoe.spec.require_paths = Dir['{lib,ext/*}']
13
+ $hoe.spec.extensions = FileList["ext/**/extconf.rb"].to_a
@@ -0,0 +1,7 @@
1
+ require File.dirname(__FILE__) + '/test_helper.rb'
2
+
3
+ class TestCharguess < Test::Unit::TestCase
4
+ def test_charguess
5
+ assert_equal "UTF-8", CharGuess::guess("áéíóú")
6
+ end
7
+ end
@@ -0,0 +1,10 @@
1
+ require "test/unit"
2
+
3
+ $:.unshift File.dirname(__FILE__) + "/../ext/charguess"
4
+ require "charguess.so"
5
+
6
+ class TestCharguessExtn < Test::Unit::TestCase
7
+ def test_charguess
8
+ assert_equal "UTF-8", CharGuess::guess("áéíóú")
9
+ end
10
+ end
@@ -0,0 +1,3 @@
1
+ require 'stringio'
2
+ require 'test/unit'
3
+ #require File.dirname(__FILE__) + '/../lib/charguess'
metadata ADDED
@@ -0,0 +1,219 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: charguess
3
+ version: !ruby/object:Gem::Version
4
+ version: "1.0"
5
+ platform: ruby
6
+ authors:
7
+ - "Ernesto Jim\xC3\xA9nez"
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-12-09 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: newgem
17
+ type: :development
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.2.1
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: hoe
27
+ type: :development
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 1.8.0
34
+ version:
35
+ description: |-
36
+ This gem builds and installs libcharguess and it's binding libcharguess-ruby
37
+
38
+ * libcharguess: http://libcharguess.sourceforge.net/
39
+ * libcharguess-ruby: http://raa.ruby-lang.org/project/charguess/
40
+ email:
41
+ - erjica@gmail.com
42
+ executables: []
43
+
44
+ extensions:
45
+ - ext/charguess/extconf.rb
46
+ extra_rdoc_files:
47
+ - History.txt
48
+ - Manifest.txt
49
+ - PostInstall.txt
50
+ - README.rdoc
51
+ files:
52
+ - History.txt
53
+ - Manifest.txt
54
+ - PostInstall.txt
55
+ - README.rdoc
56
+ - Rakefile
57
+ - ext/charguess/charguess.c
58
+ - ext/charguess/extconf.rb
59
+ - ext/libcharguess/cpp/.deps/EUCJPProber.Plo
60
+ - ext/libcharguess/cpp/.deps/EUCJPProber.Po
61
+ - ext/libcharguess/cpp/.deps/EUCKRProber.Plo
62
+ - ext/libcharguess/cpp/.deps/EUCKRProber.Po
63
+ - ext/libcharguess/cpp/.deps/EUCTWProber.Plo
64
+ - ext/libcharguess/cpp/.deps/EUCTWProber.Po
65
+ - ext/libcharguess/cpp/.deps/EscCharsetProber.Plo
66
+ - ext/libcharguess/cpp/.deps/EscCharsetProber.Po
67
+ - ext/libcharguess/cpp/.deps/EscSM.Plo
68
+ - ext/libcharguess/cpp/.deps/EscSM.Po
69
+ - ext/libcharguess/cpp/.deps/GB2312Prober.Plo
70
+ - ext/libcharguess/cpp/.deps/GB2312Prober.Po
71
+ - ext/libcharguess/cpp/.deps/LangBulgarianModel.Plo
72
+ - ext/libcharguess/cpp/.deps/LangBulgarianModel.Po
73
+ - ext/libcharguess/cpp/.deps/LangCyrillicModel.Plo
74
+ - ext/libcharguess/cpp/.deps/LangCyrillicModel.Po
75
+ - ext/libcharguess/cpp/.deps/LangGreekModel.Plo
76
+ - ext/libcharguess/cpp/.deps/LangGreekModel.Po
77
+ - ext/libcharguess/cpp/.deps/LangHungarianModel.Plo
78
+ - ext/libcharguess/cpp/.deps/LangHungarianModel.Po
79
+ - ext/libcharguess/cpp/.deps/LangThaiModel.Plo
80
+ - ext/libcharguess/cpp/.deps/LangThaiModel.Po
81
+ - ext/libcharguess/cpp/.deps/Latin1Prober.Plo
82
+ - ext/libcharguess/cpp/.deps/Latin1Prober.Po
83
+ - ext/libcharguess/cpp/.deps/MBCSGroupProber.Plo
84
+ - ext/libcharguess/cpp/.deps/MBCSGroupProber.Po
85
+ - ext/libcharguess/cpp/.deps/MBCSSM.Plo
86
+ - ext/libcharguess/cpp/.deps/MBCSSM.Po
87
+ - ext/libcharguess/cpp/.deps/SBCSGroupProber.Plo
88
+ - ext/libcharguess/cpp/.deps/SBCSGroupProber.Po
89
+ - ext/libcharguess/cpp/.deps/SBCharsetProber.Plo
90
+ - ext/libcharguess/cpp/.deps/SBCharsetProber.Po
91
+ - ext/libcharguess/cpp/.deps/SJISProber.Plo
92
+ - ext/libcharguess/cpp/.deps/SJISProber.Po
93
+ - ext/libcharguess/cpp/.deps/UTF8Prober.Plo
94
+ - ext/libcharguess/cpp/.deps/UTF8Prober.Po
95
+ - ext/libcharguess/cpp/.deps/big5Prober.Plo
96
+ - ext/libcharguess/cpp/.deps/big5Prober.Po
97
+ - ext/libcharguess/cpp/.deps/charDistribution.Plo
98
+ - ext/libcharguess/cpp/.deps/charDistribution.Po
99
+ - ext/libcharguess/cpp/.deps/chardet.Plo
100
+ - ext/libcharguess/cpp/.deps/chardet.Po
101
+ - ext/libcharguess/cpp/.deps/charguess.Po
102
+ - ext/libcharguess/cpp/.deps/jpCntx.Plo
103
+ - ext/libcharguess/cpp/.deps/jpCntx.Po
104
+ - ext/libcharguess/cpp/.deps/universal.Plo
105
+ - ext/libcharguess/cpp/.deps/universal.Po
106
+ - ext/libcharguess/cpp/AUTHORS
107
+ - ext/libcharguess/cpp/Big5Freq.tab
108
+ - ext/libcharguess/cpp/COPYING
109
+ - ext/libcharguess/cpp/COPYRIGHT
110
+ - ext/libcharguess/cpp/ChangeLog
111
+ - ext/libcharguess/cpp/EUCJPProber.cpp
112
+ - ext/libcharguess/cpp/EUCJPProber.h
113
+ - ext/libcharguess/cpp/EUCKRFreq.tab
114
+ - ext/libcharguess/cpp/EUCKRProber.cpp
115
+ - ext/libcharguess/cpp/EUCKRProber.h
116
+ - ext/libcharguess/cpp/EUCTWFreq.tab
117
+ - ext/libcharguess/cpp/EUCTWProber.cpp
118
+ - ext/libcharguess/cpp/EUCTWProber.h
119
+ - ext/libcharguess/cpp/EscCharsetProber.cpp
120
+ - ext/libcharguess/cpp/EscCharsetProber.h
121
+ - ext/libcharguess/cpp/EscSM.cpp
122
+ - ext/libcharguess/cpp/GB2312Freq.tab
123
+ - ext/libcharguess/cpp/GB2312Prober.cpp
124
+ - ext/libcharguess/cpp/GB2312Prober.h
125
+ - ext/libcharguess/cpp/INSTALL
126
+ - ext/libcharguess/cpp/JISFreq.tab
127
+ - ext/libcharguess/cpp/LICENSE
128
+ - ext/libcharguess/cpp/LangBulgarianModel.cpp
129
+ - ext/libcharguess/cpp/LangCyrillicModel.cpp
130
+ - ext/libcharguess/cpp/LangGreekModel.cpp
131
+ - ext/libcharguess/cpp/LangHungarianModel.cpp
132
+ - ext/libcharguess/cpp/LangThaiModel.cpp
133
+ - ext/libcharguess/cpp/Latin1Prober.cpp
134
+ - ext/libcharguess/cpp/Latin1Prober.h
135
+ - ext/libcharguess/cpp/MBCSGroupProber.cpp
136
+ - ext/libcharguess/cpp/MBCSGroupProber.h
137
+ - ext/libcharguess/cpp/MBCSSM.cpp
138
+ - ext/libcharguess/cpp/Makefile.am
139
+ - ext/libcharguess/cpp/Makefile.in
140
+ - ext/libcharguess/cpp/NEWS
141
+ - ext/libcharguess/cpp/README
142
+ - ext/libcharguess/cpp/SBCSGroupProber.cpp
143
+ - ext/libcharguess/cpp/SBCSGroupProber.h
144
+ - ext/libcharguess/cpp/SBCharsetProber.cpp
145
+ - ext/libcharguess/cpp/SBCharsetProber.h
146
+ - ext/libcharguess/cpp/SJISProber.cpp
147
+ - ext/libcharguess/cpp/SJISProber.h
148
+ - ext/libcharguess/cpp/UTF8Prober.cpp
149
+ - ext/libcharguess/cpp/UTF8Prober.h
150
+ - ext/libcharguess/cpp/aclocal.m4
151
+ - ext/libcharguess/cpp/autogen.sh
152
+ - ext/libcharguess/cpp/big5Prober.cpp
153
+ - ext/libcharguess/cpp/big5Prober.h
154
+ - ext/libcharguess/cpp/charDistribution.cpp
155
+ - ext/libcharguess/cpp/charDistribution.h
156
+ - ext/libcharguess/cpp/charguess.cpp
157
+ - ext/libcharguess/cpp/charguess.h
158
+ - ext/libcharguess/cpp/charsetProber.h
159
+ - ext/libcharguess/cpp/codingStateMachine.h
160
+ - ext/libcharguess/cpp/config.h
161
+ - ext/libcharguess/cpp/config.h.in
162
+ - ext/libcharguess/cpp/config.status
163
+ - ext/libcharguess/cpp/configure
164
+ - ext/libcharguess/cpp/configure.in
165
+ - ext/libcharguess/cpp/depcomp
166
+ - ext/libcharguess/cpp/fix_copyright
167
+ - ext/libcharguess/cpp/install-sh
168
+ - ext/libcharguess/cpp/jpCntx.cpp
169
+ - ext/libcharguess/cpp/jpCntx.h
170
+ - ext/libcharguess/cpp/missing
171
+ - ext/libcharguess/cpp/mkinstalldirs
172
+ - ext/libcharguess/cpp/pkgInt.h
173
+ - ext/libcharguess/cpp/stamp-h1
174
+ - ext/libcharguess/cpp/test/test.cpp
175
+ - ext/libcharguess/cpp/types.h
176
+ - ext/libcharguess/cpp/universal.cpp
177
+ - ext/libcharguess/cpp/universal.h
178
+ - script/console
179
+ - script/destroy
180
+ - script/generate
181
+ - tasks/extconf.rake
182
+ - tasks/extconf/charguess.rake
183
+ - test/test_charguess.rb
184
+ - test/test_charguess_extn.rb
185
+ - test/test_helper.rb
186
+ has_rdoc: true
187
+ homepage: http://github.com/ernesto-jimenez/charguess
188
+ licenses: []
189
+
190
+ post_install_message: PostInstall.txt
191
+ rdoc_options:
192
+ - --main
193
+ - README.rdoc
194
+ require_paths:
195
+ - ext/charguess
196
+ - ext/libcharguess
197
+ required_ruby_version: !ruby/object:Gem::Requirement
198
+ requirements:
199
+ - - ">="
200
+ - !ruby/object:Gem::Version
201
+ version: "0"
202
+ version:
203
+ required_rubygems_version: !ruby/object:Gem::Requirement
204
+ requirements:
205
+ - - ">="
206
+ - !ruby/object:Gem::Version
207
+ version: "0"
208
+ version:
209
+ requirements: []
210
+
211
+ rubyforge_project: charguess
212
+ rubygems_version: 1.3.5
213
+ signing_key:
214
+ specification_version: 3
215
+ summary: "This gem builds and installs libcharguess and it's binding libcharguess-ruby * libcharguess: http://libcharguess.sourceforge.net/ * libcharguess-ruby: http://raa.ruby-lang.org/project/charguess/"
216
+ test_files:
217
+ - test/test_charguess.rb
218
+ - test/test_charguess_extn.rb
219
+ - test/test_helper.rb