@datawrapper/jschardet 3.0.1-0 → 3.0.1-1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/jschardet.js +1 -1
- package/dist/jschardet.min.js +6 -6
- package/package.json +1 -1
- package/src/universaldetector.js +1 -1
package/dist/jschardet.js
CHANGED
|
@@ -7541,7 +7541,7 @@ const supportedEncodings = (function() {
|
|
|
7541
7541
|
})();
|
|
7542
7542
|
|
|
7543
7543
|
const supportedEncodingsDenormalized = (function() {
|
|
7544
|
-
denormalizedEncodings = [];
|
|
7544
|
+
const denormalizedEncodings = [];
|
|
7545
7545
|
for (const encoding of supportedEncodings) {
|
|
7546
7546
|
denormalizedEncodings.push(
|
|
7547
7547
|
encoding.toLocaleLowerCase(),
|
package/dist/jschardet.min.js
CHANGED
|
@@ -656,12 +656,12 @@ c=a("./hebrewprober"),e=a("./langcyrillicmodel"),f=a("./langgreekmodel"),n=a("./
|
|
|
656
656
|
this._mContextAnalyzer.reset()};this.getCharsetName=function(){return"SHIFT_JIS"};this.feed=function(p){for(var t=p.length,m=0;m<t;m++){var q=this._mCodingSM.nextState(p[m]);if(q==f.error){n.log(this.getCharsetName()+" prober hit error at byte "+m+"\n");this._mState=f.notMe;break}else if(q==f.itsMe){this._mState=f.foundIt;break}else q==f.start&&(q=this._mCodingSM.getCurrentCharLen(),0==m?(this._mLastChar[1]=p[0],this._mContextAnalyzer.feed(this._mLastChar.slice(2-q).join(""),q),this._mDistributionAnalyzer.feed(this._mLastChar.join(""),
|
|
657
657
|
q)):(this._mContextAnalyzer.feed(p.slice(m+1-q,m+3-q),q),this._mDistributionAnalyzer.feed(p.slice(m-1,m+1),q)))}this._mLastChar[0]=p[t-1];this.getState()==f.detecting&&this._mContextAnalyzer.gotEnoughData()&&this.getConfidence()>f.SHORTCUT_THRESHOLD&&(this._mState=f.foundIt);return this.getState()};this.getConfidence=function(){var p=this._mContextAnalyzer.getConfidence(),t=this._mDistributionAnalyzer.getConfidence();return Math.max(p,t)};this._mCodingSM=new h(d);this._mDistributionAnalyzer=new c;
|
|
658
658
|
this._mContextAnalyzer=new e;this.reset()}var h=a("./codingstatemachine"),k=a("./mbcharsetprober"),d=a("./mbcssm/sjis"),c=a("./chardistribution").SJISDistributionAnalysis,e=a("./jpcntx").SJISContextAnalysis,f=a("./constants"),n=a("./logger");g.prototype=new k;l.exports=g},{"./chardistribution":4,"./codingstatemachine":7,"./constants":8,"./jpcntx":21,"./logger":29,"./mbcharsetprober":30,"./mbcssm/sjis":37}],42:[function(a,l,b){var g=a("./constants"),h=a("./mbcsgroupprober"),k=a("./sbcsgroupprober"),
|
|
659
|
-
d=a("./latin1prober"),c=a("./escprober"),e=a("./logger");const f=function(){const p=[new c,new h,new k,new d],t="UTF-8 UTF-32LE UTF-32BE UTF-32BE UTF-16LE UTF-16BE X-ISO-10646-UCS-4-3412 X-ISO-10646-UCS-4-2143".split(" ").slice(0);for(const m of p)[].push.apply(t,m.getSupportedCharsetNames());return t}(),n=function(){
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
0
|
|
664
|
-
this.
|
|
659
|
+
d=a("./latin1prober"),c=a("./escprober"),e=a("./logger");const f=function(){const p=[new c,new h,new k,new d],t="UTF-8 UTF-32LE UTF-32BE UTF-32BE UTF-16LE UTF-16BE X-ISO-10646-UCS-4-3412 X-ISO-10646-UCS-4-2143".split(" ").slice(0);for(const m of p)[].push.apply(t,m.getSupportedCharsetNames());return t}(),n=function(){const p=[];for(const t of f)p.push(t.toLocaleLowerCase(),t.toLocaleLowerCase().replace(/-/g,""));return p}();l.exports=function(p){function t(m){return p.detectEncodings?p.detectEncodings.includes(m.toLowerCase()):
|
|
660
|
+
!0}p||={};p.minimumThreshold||(p.minimumThreshold=.2);if(p.detectEncodings)for(const m of p.detectEncodings)if(!n.includes(m.toLowerCase()))throw Error(`Encoding ${m} is not supported. Supported encodings: ${f}.`);this.reset=function(){this.result={encoding:null,confidence:0};this.results=[];this.done=!1;this._mStart=!0;this._mGotData=!1;this._mInputState=0;this._mLastChar=[];this._mBOM="";this._mEscCharsetProber&&this._mEscCharsetProber.reset();for(var m=0,q;q=this._mCharsetProbers[m];m++)q.reset()};
|
|
661
|
+
this.feed=function(m){if(!this.done&&m.length)if(this._mGotData||(this._mBOM+=m,"\u00ef\u00bb\u00bf"==this._mBOM.slice(0,3)&&t("UTF-8")?this.result={encoding:"UTF-8",confidence:1}:"\u00ff\u00fe\x00\x00"==this._mBOM.slice(0,4)&&t("UTF-32LE")?this.result={encoding:"UTF-32LE",confidence:1}:"\x00\x00\u00fe\u00ff"==this._mBOM.slice(0,4)&&t("UTF-32BE")?this.result={encoding:"UTF-32BE",confidence:1}:"\u00fe\u00ff\x00\x00"==this._mBOM.slice(0,4)&&t("X-ISO-10646-UCS-4-3412")?this.result={encoding:"X-ISO-10646-UCS-4-3412",
|
|
662
|
+
confidence:1}:"\x00\x00\u00ff\u00fe"==this._mBOM.slice(0,4)&&t("X-ISO-10646-UCS-4-2143")?this.result={encoding:"X-ISO-10646-UCS-4-2143",confidence:1}:"\u00ff\u00fe"==this._mBOM.slice(0,2)&&t("UTF-16LE")?this.result={encoding:"UTF-16LE",confidence:1}:"\u00fe\u00ff"==this._mBOM.slice(0,2)&&t("UTF-16BE")&&(this.result={encoding:"UTF-16BE",confidence:1}),0<this.result.confidence&&(this.results=[this.result]),3<this._mBOM.length&&(this._mGotData=!0)),this.result.encoding&&0<this.result.confidence)this.done=
|
|
663
|
+
!0;else if(0==this._mInputState&&(this._highBitDetector.test(m)?this._mInputState=2:this._escDetector.test(this._mLastChar.join("")+m)&&(this._mInputState=1)),this._mLastChar=m.slice(-1).split(""),1==this._mInputState)this._mEscCharsetProber||(this._mEscCharsetProber=new c),this._mEscCharsetProber.feed(m)==g.foundIt&&t(this._mEscCharsetProber.getCharsetName())&&(this.result={encoding:this._mEscCharsetProber.getCharsetName(),confidence:this._mEscCharsetProber.getConfidence()},this.results=[this.result],
|
|
664
|
+
this.done=!0);else if(2==this._mInputState){0==this._mCharsetProbers.length&&(this._mCharsetProbers=[new h,new k,new d]);for(var q=0,r;r=this._mCharsetProbers[q];q++)if(r.feed(m)==g.foundIt&&t(r.getCharsetName())){this.result={encoding:r.getCharsetName(),confidence:r.getConfidence()};this.results=[this.result];this.done=!0;break}}};this.close=function(){if(!this.done)if(0===this._mBOM.length)e.log("no data received!\n");else{this.done=!0;if(0==this._mInputState&&t("ascii"))return e.log("pure ascii"),
|
|
665
665
|
this.result={encoding:"ascii",confidence:1},this.results.push(this.result),this.result;if(2==this._mInputState){for(var m=0,q;q=this._mCharsetProbers[m];m++)q&&q.getCharsetName()&&t(q.getCharsetName())&&(this.results.push({encoding:q.getCharsetName(),confidence:q.getConfidence()}),e.log(q.getCharsetName()+" confidence "+q.getConfidence()));this.results.sort(function(r,u){return u.confidence-r.confidence});if(0<this.results.length&&(m=this.results[0],m.confidence>=p.minimumThreshold))return this.result=
|
|
666
666
|
m}if(e.enabled)for(e.log("no probers hit minimum threshhold\n"),m=0;q=this._mCharsetProbers[m];m++)q&&t(q.getCharsetName())&&e.log(q.getCharsetName()+" confidence = "+q.getConfidence()+"\n")}};this._highBitDetector=/[\x80-\xFF]/;this._escDetector=/(\x1B|~\{)/;this._mEscCharsetProber=null;this._mCharsetProbers=[];this.reset()}},{"./constants":8,"./escprober":9,"./latin1prober":28,"./logger":29,"./mbcsgroupprober":31,"./sbcsgroupprober":40}],43:[function(a,l,b){function g(){k.apply(this);this.reset=
|
|
667
667
|
function(){g.prototype.reset.apply(this);this._mCodingSM.reset();this._mBasicAsciiLen=this._mFullLen=this._mMBCharLen=this._mNumOfMBChar=0};this.getCharsetName=function(){return"UTF-8"};this.feed=function(e){this._mFullLen+=e.length;for(var f=0,n;f<e.length;f++){n=e[f];var p=this._mCodingSM.nextState(n);if(p==d.error){this._mState=d.notMe;break}else if(p==d.itsMe){this._mState=d.foundIt;break}else p==d.start&&(2<=this._mCodingSM.getCurrentCharLen()?(this._mNumOfMBChar++,this._mMBCharLen+=this._mCodingSM.getCurrentCharLen()):
|
package/package.json
CHANGED
package/src/universaldetector.js
CHANGED
|
@@ -57,7 +57,7 @@ const supportedEncodings = (function() {
|
|
|
57
57
|
})();
|
|
58
58
|
|
|
59
59
|
const supportedEncodingsDenormalized = (function() {
|
|
60
|
-
denormalizedEncodings = [];
|
|
60
|
+
const denormalizedEncodings = [];
|
|
61
61
|
for (const encoding of supportedEncodings) {
|
|
62
62
|
denormalizedEncodings.push(
|
|
63
63
|
encoding.toLocaleLowerCase(),
|