charguess 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. data/History.txt +4 -0
  2. data/Manifest.txt +134 -0
  3. data/PostInstall.txt +7 -0
  4. data/README.rdoc +26 -0
  5. data/Rakefile +25 -0
  6. data/ext/charguess/charguess.c +29 -0
  7. data/ext/charguess/extconf.rb +11 -0
  8. data/ext/libcharguess/cpp/.deps/EUCJPProber.Plo +1 -0
  9. data/ext/libcharguess/cpp/.deps/EUCJPProber.Po +87 -0
  10. data/ext/libcharguess/cpp/.deps/EUCKRProber.Plo +1 -0
  11. data/ext/libcharguess/cpp/.deps/EUCKRProber.Po +85 -0
  12. data/ext/libcharguess/cpp/.deps/EUCTWProber.Plo +1 -0
  13. data/ext/libcharguess/cpp/.deps/EUCTWProber.Po +85 -0
  14. data/ext/libcharguess/cpp/.deps/EscCharsetProber.Plo +1 -0
  15. data/ext/libcharguess/cpp/.deps/EscCharsetProber.Po +83 -0
  16. data/ext/libcharguess/cpp/.deps/EscSM.Plo +1 -0
  17. data/ext/libcharguess/cpp/.deps/EscSM.Po +77 -0
  18. data/ext/libcharguess/cpp/.deps/GB2312Prober.Plo +1 -0
  19. data/ext/libcharguess/cpp/.deps/GB2312Prober.Po +85 -0
  20. data/ext/libcharguess/cpp/.deps/LangBulgarianModel.Plo +1 -0
  21. data/ext/libcharguess/cpp/.deps/LangBulgarianModel.Po +78 -0
  22. data/ext/libcharguess/cpp/.deps/LangCyrillicModel.Plo +1 -0
  23. data/ext/libcharguess/cpp/.deps/LangCyrillicModel.Po +78 -0
  24. data/ext/libcharguess/cpp/.deps/LangGreekModel.Plo +1 -0
  25. data/ext/libcharguess/cpp/.deps/LangGreekModel.Po +78 -0
  26. data/ext/libcharguess/cpp/.deps/LangHungarianModel.Plo +1 -0
  27. data/ext/libcharguess/cpp/.deps/LangHungarianModel.Po +78 -0
  28. data/ext/libcharguess/cpp/.deps/LangThaiModel.Plo +1 -0
  29. data/ext/libcharguess/cpp/.deps/LangThaiModel.Po +78 -0
  30. data/ext/libcharguess/cpp/.deps/Latin1Prober.Plo +1 -0
  31. data/ext/libcharguess/cpp/.deps/Latin1Prober.Po +78 -0
  32. data/ext/libcharguess/cpp/.deps/MBCSGroupProber.Plo +1 -0
  33. data/ext/libcharguess/cpp/.deps/MBCSGroupProber.Po +102 -0
  34. data/ext/libcharguess/cpp/.deps/MBCSSM.Plo +1 -0
  35. data/ext/libcharguess/cpp/.deps/MBCSSM.Po +77 -0
  36. data/ext/libcharguess/cpp/.deps/SBCSGroupProber.Plo +1 -0
  37. data/ext/libcharguess/cpp/.deps/SBCSGroupProber.Po +80 -0
  38. data/ext/libcharguess/cpp/.deps/SBCharsetProber.Plo +1 -0
  39. data/ext/libcharguess/cpp/.deps/SBCharsetProber.Po +78 -0
  40. data/ext/libcharguess/cpp/.deps/SJISProber.Plo +1 -0
  41. data/ext/libcharguess/cpp/.deps/SJISProber.Po +86 -0
  42. data/ext/libcharguess/cpp/.deps/UTF8Prober.Plo +1 -0
  43. data/ext/libcharguess/cpp/.deps/UTF8Prober.Po +82 -0
  44. data/ext/libcharguess/cpp/.deps/big5Prober.Plo +1 -0
  45. data/ext/libcharguess/cpp/.deps/big5Prober.Po +84 -0
  46. data/ext/libcharguess/cpp/.deps/charDistribution.Plo +1 -0
  47. data/ext/libcharguess/cpp/.deps/charDistribution.Po +87 -0
  48. data/ext/libcharguess/cpp/.deps/chardet.Plo +1 -0
  49. data/ext/libcharguess/cpp/.deps/chardet.Po +84 -0
  50. data/ext/libcharguess/cpp/.deps/charguess.Po +77 -0
  51. data/ext/libcharguess/cpp/.deps/jpCntx.Plo +1 -0
  52. data/ext/libcharguess/cpp/.deps/jpCntx.Po +75 -0
  53. data/ext/libcharguess/cpp/.deps/universal.Plo +1 -0
  54. data/ext/libcharguess/cpp/.deps/universal.Po +111 -0
  55. data/ext/libcharguess/cpp/AUTHORS +3 -0
  56. data/ext/libcharguess/cpp/Big5Freq.tab +928 -0
  57. data/ext/libcharguess/cpp/COPYING +340 -0
  58. data/ext/libcharguess/cpp/COPYRIGHT +20 -0
  59. data/ext/libcharguess/cpp/ChangeLog +0 -0
  60. data/ext/libcharguess/cpp/EUCJPProber.cpp +80 -0
  61. data/ext/libcharguess/cpp/EUCJPProber.h +58 -0
  62. data/ext/libcharguess/cpp/EUCKRFreq.tab +615 -0
  63. data/ext/libcharguess/cpp/EUCKRProber.cpp +80 -0
  64. data/ext/libcharguess/cpp/EUCKRProber.h +54 -0
  65. data/ext/libcharguess/cpp/EUCTWFreq.tab +448 -0
  66. data/ext/libcharguess/cpp/EUCTWProber.cpp +79 -0
  67. data/ext/libcharguess/cpp/EUCTWProber.h +53 -0
  68. data/ext/libcharguess/cpp/EscCharsetProber.cpp +89 -0
  69. data/ext/libcharguess/cpp/EscCharsetProber.h +49 -0
  70. data/ext/libcharguess/cpp/EscSM.cpp +244 -0
  71. data/ext/libcharguess/cpp/GB2312Freq.tab +476 -0
  72. data/ext/libcharguess/cpp/GB2312Prober.cpp +84 -0
  73. data/ext/libcharguess/cpp/GB2312Prober.h +56 -0
  74. data/ext/libcharguess/cpp/INSTALL +229 -0
  75. data/ext/libcharguess/cpp/JISFreq.tab +574 -0
  76. data/ext/libcharguess/cpp/LICENSE +504 -0
  77. data/ext/libcharguess/cpp/LangBulgarianModel.cpp +230 -0
  78. data/ext/libcharguess/cpp/LangCyrillicModel.cpp +340 -0
  79. data/ext/libcharguess/cpp/LangGreekModel.cpp +229 -0
  80. data/ext/libcharguess/cpp/LangHungarianModel.cpp +228 -0
  81. data/ext/libcharguess/cpp/LangThaiModel.cpp +206 -0
  82. data/ext/libcharguess/cpp/Latin1Prober.cpp +190 -0
  83. data/ext/libcharguess/cpp/Latin1Prober.h +49 -0
  84. data/ext/libcharguess/cpp/MBCSGroupProber.cpp +186 -0
  85. data/ext/libcharguess/cpp/MBCSGroupProber.h +58 -0
  86. data/ext/libcharguess/cpp/MBCSSM.cpp +610 -0
  87. data/ext/libcharguess/cpp/Makefile.am +45 -0
  88. data/ext/libcharguess/cpp/Makefile.in +608 -0
  89. data/ext/libcharguess/cpp/NEWS +0 -0
  90. data/ext/libcharguess/cpp/README +0 -0
  91. data/ext/libcharguess/cpp/SBCSGroupProber.cpp +244 -0
  92. data/ext/libcharguess/cpp/SBCSGroupProber.h +54 -0
  93. data/ext/libcharguess/cpp/SBCharsetProber.cpp +100 -0
  94. data/ext/libcharguess/cpp/SBCharsetProber.h +89 -0
  95. data/ext/libcharguess/cpp/SJISProber.cpp +86 -0
  96. data/ext/libcharguess/cpp/SJISProber.h +60 -0
  97. data/ext/libcharguess/cpp/UTF8Prober.cpp +75 -0
  98. data/ext/libcharguess/cpp/UTF8Prober.h +46 -0
  99. data/ext/libcharguess/cpp/aclocal.m4 +1008 -0
  100. data/ext/libcharguess/cpp/autogen.sh +153 -0
  101. data/ext/libcharguess/cpp/big5Prober.cpp +76 -0
  102. data/ext/libcharguess/cpp/big5Prober.h +53 -0
  103. data/ext/libcharguess/cpp/charDistribution.cpp +90 -0
  104. data/ext/libcharguess/cpp/charDistribution.h +219 -0
  105. data/ext/libcharguess/cpp/charguess.cpp +56 -0
  106. data/ext/libcharguess/cpp/charguess.h +23 -0
  107. data/ext/libcharguess/cpp/charsetProber.h +50 -0
  108. data/ext/libcharguess/cpp/codingStateMachine.h +92 -0
  109. data/ext/libcharguess/cpp/config.h +36 -0
  110. data/ext/libcharguess/cpp/config.h.in +35 -0
  111. data/ext/libcharguess/cpp/config.status +1075 -0
  112. data/ext/libcharguess/cpp/configure +5226 -0
  113. data/ext/libcharguess/cpp/configure.in +49 -0
  114. data/ext/libcharguess/cpp/depcomp +472 -0
  115. data/ext/libcharguess/cpp/fix_copyright +32 -0
  116. data/ext/libcharguess/cpp/install-sh +294 -0
  117. data/ext/libcharguess/cpp/jpCntx.cpp +194 -0
  118. data/ext/libcharguess/cpp/jpCntx.h +100 -0
  119. data/ext/libcharguess/cpp/missing +336 -0
  120. data/ext/libcharguess/cpp/mkinstalldirs +111 -0
  121. data/ext/libcharguess/cpp/pkgInt.h +72 -0
  122. data/ext/libcharguess/cpp/stamp-h1 +1 -0
  123. data/ext/libcharguess/cpp/test/test.cpp +78 -0
  124. data/ext/libcharguess/cpp/types.h +41 -0
  125. data/ext/libcharguess/cpp/universal.cpp +273 -0
  126. data/ext/libcharguess/cpp/universal.h +65 -0
  127. data/script/console +9 -0
  128. data/script/destroy +14 -0
  129. data/script/generate +14 -0
  130. data/tasks/extconf/charguess.rake +47 -0
  131. data/tasks/extconf.rake +13 -0
  132. data/test/test_charguess.rb +7 -0
  133. data/test/test_charguess_extn.rb +10 -0
  134. data/test/test_helper.rb +3 -0
  135. metadata +219 -0
@@ -0,0 +1,273 @@
1
+ /*
2
+ libcharguess - Guess the encoding/charset of a string
3
+ Copyright (C) 2003 Stephane Corbe <noubi@users.sourceforge.net>
4
+ Based on Mozilla sources
5
+
6
+ This library is free software; you can redistribute it and/or
7
+ modify it under the terms of the GNU Lesser General Public
8
+ License as published by the Free Software Foundation; either
9
+ version 2.1 of the License, or (at your option) any later version.
10
+
11
+ This library is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ Lesser General Public License for more details.
15
+
16
+ You should have received a copy of the GNU Lesser General Public
17
+ License along with this library; if not, write to the Free Software
18
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ */
20
+
21
+ #include "universal.h"
22
+
23
+ #include "MBCSGroupProber.h"
24
+ #include "SBCSGroupProber.h"
25
+ #include "EscCharsetProber.h"
26
+ #include "Latin1Prober.h"
27
+
28
+ nsUniversalDetector::nsUniversalDetector()
29
+ {
30
+ mDone = PR_FALSE;
31
+ mBestGuess = -1; //illegal value as signal
32
+ mInTag = PR_FALSE;
33
+ mEscCharSetProber = nsnull;
34
+
35
+ mStart = PR_TRUE;
36
+ mDetectedCharset = nsnull;
37
+ mGotData = PR_FALSE;
38
+ mInputState = ePureAscii;
39
+ mLastChar = '\0';
40
+
41
+ PRUint32 i;
42
+ for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
43
+ mCharSetProbers[i] = nsnull;
44
+ }
45
+
46
+ nsUniversalDetector::~nsUniversalDetector()
47
+ {
48
+ for (PRInt32 i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
49
+ if (mCharSetProbers[i])
50
+ delete mCharSetProbers[i];
51
+ if (mEscCharSetProber)
52
+ delete mEscCharSetProber;
53
+ }
54
+
55
+ void nsUniversalDetector::Reset()
56
+ {
57
+ mDone = PR_FALSE;
58
+ mBestGuess = -1; //illegal value as signal
59
+ mInTag = PR_FALSE;
60
+
61
+ mStart = PR_TRUE;
62
+ mDetectedCharset = nsnull;
63
+ mGotData = PR_FALSE;
64
+ mInputState = ePureAscii;
65
+ mLastChar = '\0';
66
+
67
+ if (mEscCharSetProber)
68
+ mEscCharSetProber->Reset();
69
+
70
+ PRUint32 i;
71
+ for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
72
+ if (mCharSetProbers[i])
73
+ mCharSetProbers[i]->Reset();
74
+ }
75
+
76
+ //---------------------------------------------------------------------
77
+ #define SHORTCUT_THRESHOLD (float)0.95
78
+ #define MINIMUM_THRESHOLD (float)0.20
79
+
80
+ void nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
81
+ {
82
+ if(mDone)
83
+ return;
84
+
85
+ if (aLen > 0)
86
+ mGotData = PR_TRUE;
87
+
88
+ //If the data starts with BOM, we know it is UTF
89
+ if (mStart)
90
+ {
91
+ mStart = PR_FALSE;
92
+ if (aLen > 3)
93
+ switch (aBuf[0])
94
+ {
95
+ case '\xEF':
96
+ if (('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2]))
97
+ // EF BB BF UTF-8 encoded BOM
98
+ mDetectedCharset = "UTF-8";
99
+ break;
100
+ case '\xFE':
101
+ if (('\xFF' == aBuf[1]) && ('\x00' == aBuf[2]) && ('\x00' == aBuf[3]))
102
+ // FE FF 00 00 UCS-4, unusual octet order BOM (3412)
103
+ mDetectedCharset = "X-ISO-10646-UCS-4-3412";
104
+ else if ('\xFF' == aBuf[1])
105
+ // FE FF UTF-16, big endian BOM
106
+ mDetectedCharset = "UTF-16BE";
107
+ break;
108
+ case '\x00':
109
+ if (('\x00' == aBuf[1]) && ('\xFE' == aBuf[2]) && ('\xFF' == aBuf[3]))
110
+ // 00 00 FE FF UTF-32, big-endian BOM
111
+ mDetectedCharset = "UTF-32BE";
112
+ else if (('\x00' == aBuf[1]) && ('\xFF' == aBuf[2]) && ('\xFE' == aBuf[3]))
113
+ // 00 00 FF FE UCS-4, unusual octet order BOM (2143)
114
+ mDetectedCharset = "X-ISO-10646-UCS-4-2143";
115
+ break;
116
+ case '\xFF':
117
+ if (('\xFE' == aBuf[1]) && ('\x00' == aBuf[2]) && ('\x00' == aBuf[3]))
118
+ // FF FE 00 00 UTF-32, little-endian BOM
119
+ mDetectedCharset = "UTF-32LE";
120
+ else if ('\xFE' == aBuf[1])
121
+ // FF FE UTF-16, little endian BOM
122
+ mDetectedCharset = "UTF-16LE";
123
+ break;
124
+ } // switch
125
+
126
+ if (mDetectedCharset)
127
+ {
128
+ mDone = PR_TRUE;
129
+ return;
130
+ }
131
+ }
132
+
133
+ PRUint32 i;
134
+ for (i = 0; i < aLen; i++)
135
+ {
136
+ //other than 0xa0, if every othe character is ascii, the page is ascii
137
+ if (aBuf[i] & '\x80' && aBuf[i] != '\xA0') //Since many Ascii only page contains NBSP
138
+ {
139
+ //we got a non-ascii byte (high-byte)
140
+ if (mInputState != eHighbyte)
141
+ {
142
+ //adjust state
143
+ mInputState = eHighbyte;
144
+
145
+ //kill mEscCharSetProber if it is active
146
+ if (mEscCharSetProber) {
147
+ delete mEscCharSetProber;
148
+ mEscCharSetProber = nsnull;
149
+ }
150
+
151
+ //start multibyte and singlebyte charset prober
152
+ if (nsnull == mCharSetProbers[0])
153
+ mCharSetProbers[0] = new nsMBCSGroupProber;
154
+ if (nsnull == mCharSetProbers[1])
155
+ mCharSetProbers[1] = new nsSBCSGroupProber;
156
+ if (nsnull == mCharSetProbers[2])
157
+ mCharSetProbers[2] = new nsLatin1Prober;
158
+ }
159
+ }
160
+ else
161
+ {
162
+ //ok, just pure ascii so far
163
+ if ( ePureAscii == mInputState &&
164
+ (aBuf[i] == '\033' || (aBuf[i] == '{' && mLastChar == '~')) )
165
+ {
166
+ //found escape character or HZ "~{"
167
+ mInputState = eEscAscii;
168
+ }
169
+ mLastChar = aBuf[i];
170
+ }
171
+ }
172
+
173
+ nsProbingState st;
174
+ switch (mInputState)
175
+ {
176
+ case eEscAscii:
177
+ if (nsnull == mEscCharSetProber)
178
+ mEscCharSetProber = new nsEscCharSetProber;
179
+ st = mEscCharSetProber->HandleData(aBuf, aLen);
180
+ if (st == eFoundIt)
181
+ {
182
+ mDone = PR_TRUE;
183
+ mDetectedCharset = mEscCharSetProber->GetCharSetName();
184
+ }
185
+ break;
186
+ case eHighbyte:
187
+ for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
188
+ {
189
+ st = mCharSetProbers[i]->HandleData(aBuf, aLen);
190
+ if (st == eFoundIt)
191
+ {
192
+ mDone = PR_TRUE;
193
+ mDetectedCharset = mCharSetProbers[i]->GetCharSetName();
194
+ return;
195
+ }
196
+ }
197
+ break;
198
+
199
+ default: //pure ascii
200
+ ;//do nothing here
201
+ }
202
+ return ;
203
+ }
204
+
205
+
206
+ //---------------------------------------------------------------------
207
+ void nsUniversalDetector::DataEnd()
208
+ {
209
+ if (!mGotData)
210
+ {
211
+ // we haven't got any data yet, return immediately
212
+ // caller program sometimes call DataEnd before anything has been sent to detector
213
+ return;
214
+ }
215
+
216
+ if (mDetectedCharset)
217
+ {
218
+ mDone = PR_TRUE;
219
+ Report(mDetectedCharset);
220
+ return;
221
+ }
222
+
223
+ switch (mInputState)
224
+ {
225
+ case eHighbyte:
226
+ {
227
+ float proberConfidence;
228
+ float maxProberConfidence = (float)0.0;
229
+ PRInt32 maxProber = 0;
230
+
231
+ for (PRInt32 i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
232
+ {
233
+ proberConfidence = mCharSetProbers[i]->GetConfidence();
234
+ #ifdef DEBUG_chardet
235
+ mCharSetProbers[i]->DumpStatus();
236
+ #endif
237
+
238
+ if (proberConfidence > maxProberConfidence)
239
+ {
240
+ maxProberConfidence = proberConfidence;
241
+ maxProber = i;
242
+ }
243
+ }
244
+ //do not report anything because we are not confident of it, that's in fact a negative answer
245
+ if (maxProberConfidence > MINIMUM_THRESHOLD)
246
+ Report(mCharSetProbers[maxProber]->GetCharSetName());
247
+ }
248
+ break;
249
+ case eEscAscii:
250
+ break;
251
+ default:
252
+ ;
253
+ }
254
+ return;
255
+ }
256
+
257
+
258
+ void nsUniversalDetector::Report(const char* aCharset)
259
+ {
260
+ if (!mDone)
261
+ {
262
+ mDone = PR_TRUE;
263
+ mDetectedCharset = aCharset;
264
+ }
265
+ }
266
+
267
+ const char* nsUniversalDetector::GetCharset(void)
268
+ {
269
+ if (mDone == PR_TRUE)
270
+ return (mDetectedCharset);
271
+ else
272
+ return NULL;
273
+ }
@@ -0,0 +1,65 @@
1
+ /*
2
+ libcharguess - Guess the encoding/charset of a string
3
+ Copyright (C) 2003 Stephane Corbe <noubi@users.sourceforge.net>
4
+ Based on Mozilla sources
5
+
6
+ This library is free software; you can redistribute it and/or
7
+ modify it under the terms of the GNU Lesser General Public
8
+ License as published by the Free Software Foundation; either
9
+ version 2.1 of the License, or (at your option) any later version.
10
+
11
+ This library is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ Lesser General Public License for more details.
15
+
16
+ You should have received a copy of the GNU Lesser General Public
17
+ License along with this library; if not, write to the Free Software
18
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ */
20
+
21
+ #ifndef universal_h__
22
+ #define universal_h__
23
+
24
+ #include "types.h"
25
+
26
+ // #include "nsICharsetDetector.h"
27
+ // #include "nsIStringCharsetDetector.h"
28
+ // #include "nsICharsetDetectionObserver.h"
29
+
30
+ class nsCharSetProber;
31
+
32
+ #define NUM_OF_CHARSET_PROBERS 3
33
+
34
+ typedef enum {
35
+ ePureAscii = 0,
36
+ eEscAscii = 1,
37
+ eHighbyte = 2
38
+ } nsInputState;
39
+
40
+ class nsUniversalDetector {
41
+ public:
42
+ nsUniversalDetector();
43
+ virtual ~nsUniversalDetector();
44
+ virtual void HandleData(const char* aBuf, PRUint32 aLen);
45
+ virtual void DataEnd(void);
46
+ virtual void Reset();
47
+ virtual const char* GetCharset(void);
48
+
49
+ protected:
50
+ virtual void Report(const char* aCharset);
51
+ nsInputState mInputState;
52
+ PRBool mDone;
53
+ PRBool mInTag;
54
+ PRBool mStart;
55
+ PRBool mGotData;
56
+ char mLastChar;
57
+ const char * mDetectedCharset;
58
+ PRInt32 mBestGuess;
59
+
60
+ nsCharSetProber *mCharSetProbers[NUM_OF_CHARSET_PROBERS];
61
+ nsCharSetProber *mEscCharSetProber;
62
+ };
63
+
64
+ #endif
65
+
data/script/console ADDED
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+ # File: script/console
3
+ irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
4
+
5
+ libs = " -r irb/completion"
6
+ # Perhaps use a console_lib to store any extra methods I may want available in the cosole
7
+ # libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
8
+ puts "Loading charguess gem"
9
+ exec "#{irb} #{libs} --simple-prompt"
data/script/destroy ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/destroy'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Destroy.new.run(ARGV)
data/script/generate ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/generate'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Generate.new.run(ARGV)
@@ -0,0 +1,47 @@
1
+ namespace :extconf do
2
+ extension = File.basename(__FILE__, '.rake')
3
+
4
+ ext = "ext/#{extension}"
5
+ ext_so = "#{ext}/#{extension}.#{Config::CONFIG['DLEXT']}"
6
+ ext_files = FileList[
7
+ "#{ext}/*.c",
8
+ "#{ext}/*.h",
9
+ "#{ext}/*.rl",
10
+ "#{ext}/extconf.rb",
11
+ "#{ext}/Makefile",
12
+ # "lib"
13
+ ]
14
+
15
+
16
+ task :compile => extension do
17
+ if Dir.glob("**/#{extension}.{o,so,dll}").length == 0
18
+ STDERR.puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
19
+ STDERR.puts "Gem actually failed to build. Your system is"
20
+ STDERR.puts "NOT configured properly to build charguess."
21
+ STDERR.puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
22
+ exit(1)
23
+ end
24
+ end
25
+
26
+ desc "Builds just the #{extension} extension"
27
+ task extension.to_sym => ["#{ext}/Makefile", ext_so ]
28
+
29
+ file "#{ext}/Makefile" => ["#{ext}/extconf.rb"] do
30
+ Dir.chdir("ext/libcharguess/cpp") do
31
+ sh("./configure")
32
+ sh("make")
33
+ end
34
+ Dir.chdir(ext) do ruby "extconf.rb" end
35
+ end
36
+
37
+ file ext_so => ext_files do
38
+ Dir.chdir(ext) do
39
+ sh(PLATFORM =~ /win32/ ? 'nmake' : 'make') do |ok, res|
40
+ if !ok
41
+ require "fileutils"
42
+ FileUtils.rm Dir.glob('*.{so,o,dll,bundle}')
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,13 @@
1
+ namespace :extconf do
2
+ desc "Compiles the Ruby extension"
3
+ task :compile
4
+ end
5
+
6
+ task :compile => "extconf:compile"
7
+
8
+ task :test => :compile
9
+
10
+ BIN = "*.{bundle,jar,so,obj,pdb,lib,def,exp}"
11
+ $hoe.clean_globs |= ["ext/**/#{BIN}", "lib/**/#{BIN}", 'ext/**/Makefile']
12
+ $hoe.spec.require_paths = Dir['{lib,ext/*}']
13
+ $hoe.spec.extensions = FileList["ext/**/extconf.rb"].to_a
@@ -0,0 +1,7 @@
1
+ require File.dirname(__FILE__) + '/test_helper.rb'
2
+
3
+ class TestCharguess < Test::Unit::TestCase
4
+ def test_charguess
5
+ assert_equal "UTF-8", CharGuess::guess("áéíóú")
6
+ end
7
+ end
@@ -0,0 +1,10 @@
1
+ require "test/unit"
2
+
3
+ $:.unshift File.dirname(__FILE__) + "/../ext/charguess"
4
+ require "charguess.so"
5
+
6
+ class TestCharguessExtn < Test::Unit::TestCase
7
+ def test_charguess
8
+ assert_equal "UTF-8", CharGuess::guess("áéíóú")
9
+ end
10
+ end
@@ -0,0 +1,3 @@
1
+ require 'stringio'
2
+ require 'test/unit'
3
+ #require File.dirname(__FILE__) + '/../lib/charguess'
metadata ADDED
@@ -0,0 +1,219 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: charguess
3
+ version: !ruby/object:Gem::Version
4
+ version: "1.0"
5
+ platform: ruby
6
+ authors:
7
+ - "Ernesto Jim\xC3\xA9nez"
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-12-09 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: newgem
17
+ type: :development
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.2.1
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: hoe
27
+ type: :development
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 1.8.0
34
+ version:
35
+ description: |-
36
+ This gem builds and installs libcharguess and it's binding libcharguess-ruby
37
+
38
+ * libcharguess: http://libcharguess.sourceforge.net/
39
+ * libcharguess-ruby: http://raa.ruby-lang.org/project/charguess/
40
+ email:
41
+ - erjica@gmail.com
42
+ executables: []
43
+
44
+ extensions:
45
+ - ext/charguess/extconf.rb
46
+ extra_rdoc_files:
47
+ - History.txt
48
+ - Manifest.txt
49
+ - PostInstall.txt
50
+ - README.rdoc
51
+ files:
52
+ - History.txt
53
+ - Manifest.txt
54
+ - PostInstall.txt
55
+ - README.rdoc
56
+ - Rakefile
57
+ - ext/charguess/charguess.c
58
+ - ext/charguess/extconf.rb
59
+ - ext/libcharguess/cpp/.deps/EUCJPProber.Plo
60
+ - ext/libcharguess/cpp/.deps/EUCJPProber.Po
61
+ - ext/libcharguess/cpp/.deps/EUCKRProber.Plo
62
+ - ext/libcharguess/cpp/.deps/EUCKRProber.Po
63
+ - ext/libcharguess/cpp/.deps/EUCTWProber.Plo
64
+ - ext/libcharguess/cpp/.deps/EUCTWProber.Po
65
+ - ext/libcharguess/cpp/.deps/EscCharsetProber.Plo
66
+ - ext/libcharguess/cpp/.deps/EscCharsetProber.Po
67
+ - ext/libcharguess/cpp/.deps/EscSM.Plo
68
+ - ext/libcharguess/cpp/.deps/EscSM.Po
69
+ - ext/libcharguess/cpp/.deps/GB2312Prober.Plo
70
+ - ext/libcharguess/cpp/.deps/GB2312Prober.Po
71
+ - ext/libcharguess/cpp/.deps/LangBulgarianModel.Plo
72
+ - ext/libcharguess/cpp/.deps/LangBulgarianModel.Po
73
+ - ext/libcharguess/cpp/.deps/LangCyrillicModel.Plo
74
+ - ext/libcharguess/cpp/.deps/LangCyrillicModel.Po
75
+ - ext/libcharguess/cpp/.deps/LangGreekModel.Plo
76
+ - ext/libcharguess/cpp/.deps/LangGreekModel.Po
77
+ - ext/libcharguess/cpp/.deps/LangHungarianModel.Plo
78
+ - ext/libcharguess/cpp/.deps/LangHungarianModel.Po
79
+ - ext/libcharguess/cpp/.deps/LangThaiModel.Plo
80
+ - ext/libcharguess/cpp/.deps/LangThaiModel.Po
81
+ - ext/libcharguess/cpp/.deps/Latin1Prober.Plo
82
+ - ext/libcharguess/cpp/.deps/Latin1Prober.Po
83
+ - ext/libcharguess/cpp/.deps/MBCSGroupProber.Plo
84
+ - ext/libcharguess/cpp/.deps/MBCSGroupProber.Po
85
+ - ext/libcharguess/cpp/.deps/MBCSSM.Plo
86
+ - ext/libcharguess/cpp/.deps/MBCSSM.Po
87
+ - ext/libcharguess/cpp/.deps/SBCSGroupProber.Plo
88
+ - ext/libcharguess/cpp/.deps/SBCSGroupProber.Po
89
+ - ext/libcharguess/cpp/.deps/SBCharsetProber.Plo
90
+ - ext/libcharguess/cpp/.deps/SBCharsetProber.Po
91
+ - ext/libcharguess/cpp/.deps/SJISProber.Plo
92
+ - ext/libcharguess/cpp/.deps/SJISProber.Po
93
+ - ext/libcharguess/cpp/.deps/UTF8Prober.Plo
94
+ - ext/libcharguess/cpp/.deps/UTF8Prober.Po
95
+ - ext/libcharguess/cpp/.deps/big5Prober.Plo
96
+ - ext/libcharguess/cpp/.deps/big5Prober.Po
97
+ - ext/libcharguess/cpp/.deps/charDistribution.Plo
98
+ - ext/libcharguess/cpp/.deps/charDistribution.Po
99
+ - ext/libcharguess/cpp/.deps/chardet.Plo
100
+ - ext/libcharguess/cpp/.deps/chardet.Po
101
+ - ext/libcharguess/cpp/.deps/charguess.Po
102
+ - ext/libcharguess/cpp/.deps/jpCntx.Plo
103
+ - ext/libcharguess/cpp/.deps/jpCntx.Po
104
+ - ext/libcharguess/cpp/.deps/universal.Plo
105
+ - ext/libcharguess/cpp/.deps/universal.Po
106
+ - ext/libcharguess/cpp/AUTHORS
107
+ - ext/libcharguess/cpp/Big5Freq.tab
108
+ - ext/libcharguess/cpp/COPYING
109
+ - ext/libcharguess/cpp/COPYRIGHT
110
+ - ext/libcharguess/cpp/ChangeLog
111
+ - ext/libcharguess/cpp/EUCJPProber.cpp
112
+ - ext/libcharguess/cpp/EUCJPProber.h
113
+ - ext/libcharguess/cpp/EUCKRFreq.tab
114
+ - ext/libcharguess/cpp/EUCKRProber.cpp
115
+ - ext/libcharguess/cpp/EUCKRProber.h
116
+ - ext/libcharguess/cpp/EUCTWFreq.tab
117
+ - ext/libcharguess/cpp/EUCTWProber.cpp
118
+ - ext/libcharguess/cpp/EUCTWProber.h
119
+ - ext/libcharguess/cpp/EscCharsetProber.cpp
120
+ - ext/libcharguess/cpp/EscCharsetProber.h
121
+ - ext/libcharguess/cpp/EscSM.cpp
122
+ - ext/libcharguess/cpp/GB2312Freq.tab
123
+ - ext/libcharguess/cpp/GB2312Prober.cpp
124
+ - ext/libcharguess/cpp/GB2312Prober.h
125
+ - ext/libcharguess/cpp/INSTALL
126
+ - ext/libcharguess/cpp/JISFreq.tab
127
+ - ext/libcharguess/cpp/LICENSE
128
+ - ext/libcharguess/cpp/LangBulgarianModel.cpp
129
+ - ext/libcharguess/cpp/LangCyrillicModel.cpp
130
+ - ext/libcharguess/cpp/LangGreekModel.cpp
131
+ - ext/libcharguess/cpp/LangHungarianModel.cpp
132
+ - ext/libcharguess/cpp/LangThaiModel.cpp
133
+ - ext/libcharguess/cpp/Latin1Prober.cpp
134
+ - ext/libcharguess/cpp/Latin1Prober.h
135
+ - ext/libcharguess/cpp/MBCSGroupProber.cpp
136
+ - ext/libcharguess/cpp/MBCSGroupProber.h
137
+ - ext/libcharguess/cpp/MBCSSM.cpp
138
+ - ext/libcharguess/cpp/Makefile.am
139
+ - ext/libcharguess/cpp/Makefile.in
140
+ - ext/libcharguess/cpp/NEWS
141
+ - ext/libcharguess/cpp/README
142
+ - ext/libcharguess/cpp/SBCSGroupProber.cpp
143
+ - ext/libcharguess/cpp/SBCSGroupProber.h
144
+ - ext/libcharguess/cpp/SBCharsetProber.cpp
145
+ - ext/libcharguess/cpp/SBCharsetProber.h
146
+ - ext/libcharguess/cpp/SJISProber.cpp
147
+ - ext/libcharguess/cpp/SJISProber.h
148
+ - ext/libcharguess/cpp/UTF8Prober.cpp
149
+ - ext/libcharguess/cpp/UTF8Prober.h
150
+ - ext/libcharguess/cpp/aclocal.m4
151
+ - ext/libcharguess/cpp/autogen.sh
152
+ - ext/libcharguess/cpp/big5Prober.cpp
153
+ - ext/libcharguess/cpp/big5Prober.h
154
+ - ext/libcharguess/cpp/charDistribution.cpp
155
+ - ext/libcharguess/cpp/charDistribution.h
156
+ - ext/libcharguess/cpp/charguess.cpp
157
+ - ext/libcharguess/cpp/charguess.h
158
+ - ext/libcharguess/cpp/charsetProber.h
159
+ - ext/libcharguess/cpp/codingStateMachine.h
160
+ - ext/libcharguess/cpp/config.h
161
+ - ext/libcharguess/cpp/config.h.in
162
+ - ext/libcharguess/cpp/config.status
163
+ - ext/libcharguess/cpp/configure
164
+ - ext/libcharguess/cpp/configure.in
165
+ - ext/libcharguess/cpp/depcomp
166
+ - ext/libcharguess/cpp/fix_copyright
167
+ - ext/libcharguess/cpp/install-sh
168
+ - ext/libcharguess/cpp/jpCntx.cpp
169
+ - ext/libcharguess/cpp/jpCntx.h
170
+ - ext/libcharguess/cpp/missing
171
+ - ext/libcharguess/cpp/mkinstalldirs
172
+ - ext/libcharguess/cpp/pkgInt.h
173
+ - ext/libcharguess/cpp/stamp-h1
174
+ - ext/libcharguess/cpp/test/test.cpp
175
+ - ext/libcharguess/cpp/types.h
176
+ - ext/libcharguess/cpp/universal.cpp
177
+ - ext/libcharguess/cpp/universal.h
178
+ - script/console
179
+ - script/destroy
180
+ - script/generate
181
+ - tasks/extconf.rake
182
+ - tasks/extconf/charguess.rake
183
+ - test/test_charguess.rb
184
+ - test/test_charguess_extn.rb
185
+ - test/test_helper.rb
186
+ has_rdoc: true
187
+ homepage: http://github.com/ernesto-jimenez/charguess
188
+ licenses: []
189
+
190
+ post_install_message: PostInstall.txt
191
+ rdoc_options:
192
+ - --main
193
+ - README.rdoc
194
+ require_paths:
195
+ - ext/charguess
196
+ - ext/libcharguess
197
+ required_ruby_version: !ruby/object:Gem::Requirement
198
+ requirements:
199
+ - - ">="
200
+ - !ruby/object:Gem::Version
201
+ version: "0"
202
+ version:
203
+ required_rubygems_version: !ruby/object:Gem::Requirement
204
+ requirements:
205
+ - - ">="
206
+ - !ruby/object:Gem::Version
207
+ version: "0"
208
+ version:
209
+ requirements: []
210
+
211
+ rubyforge_project: charguess
212
+ rubygems_version: 1.3.5
213
+ signing_key:
214
+ specification_version: 3
215
+ summary: "This gem builds and installs libcharguess and it's binding libcharguess-ruby * libcharguess: http://libcharguess.sourceforge.net/ * libcharguess-ruby: http://raa.ruby-lang.org/project/charguess/"
216
+ test_files:
217
+ - test/test_charguess.rb
218
+ - test/test_charguess_extn.rb
219
+ - test/test_helper.rb