github-linguist 5.3.1 → 5.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/linguist/extconf.rb +3 -0
- data/ext/linguist/lex.linguist_yy.c +8269 -0
- data/ext/linguist/lex.linguist_yy.h +353 -0
- data/ext/linguist/linguist.c +64 -0
- data/ext/linguist/linguist.h +11 -0
- data/ext/linguist/tokenizer.l +119 -0
- data/grammars/source.coffee.json +123 -41
- data/grammars/source.crystal.json +2 -2
- data/grammars/source.css.less.json +319 -27
- data/grammars/source.glsl.json +1 -1
- data/grammars/source.js.json +6 -2
- data/grammars/source.meson.json +1 -1
- data/grammars/source.tsx.json +4 -14
- data/grammars/source.wdl.json +2 -2
- data/grammars/text.roff.json +155 -41
- data/grammars/text.shell-session.json +1 -1
- data/lib/linguist/blob_helper.rb +47 -4
- data/lib/linguist/classifier.rb +3 -1
- data/lib/linguist/file_blob.rb +3 -3
- data/lib/linguist/heuristics.rb +15 -6
- data/lib/linguist/linguist.bundle +0 -0
- data/lib/linguist/samples.json +49989 -44225
- data/lib/linguist/strategy/modeline.rb +2 -2
- data/lib/linguist/tokenizer.rb +1 -186
- data/lib/linguist/version.rb +1 -1
- metadata +25 -3
data/grammars/source.glsl.json
CHANGED
@@ -81,7 +81,7 @@
|
|
81
81
|
"name": "storage.modifier.glsl"
|
82
82
|
},
|
83
83
|
{
|
84
|
-
"match": "\\b(gl_BackColor|gl_BackLightModelProduct|gl_BackLightProduct|gl_BackMaterial|gl_BackSecondaryColor|gl_ClipDistance|gl_ClipPlane|gl_ClipVertex|gl_Color|gl_DepthRange|gl_DepthRangeParameters|gl_EyePlaneQ|gl_EyePlaneR|gl_EyePlaneS|gl_EyePlaneT|gl_Fog|gl_FogCoord|gl_FogFragCoord|gl_FogParameters|gl_FragColor|gl_FragCoord|
|
84
|
+
"match": "\\b(gl_BackColor|gl_BackLightModelProduct|gl_BackLightProduct|gl_BackMaterial|gl_BackSecondaryColor|gl_ClipDistance|gl_ClipPlane|gl_ClipVertex|gl_Color|gl_DepthRange|gl_DepthRangeParameters|gl_EyePlaneQ|gl_EyePlaneR|gl_EyePlaneS|gl_EyePlaneT|gl_Fog|gl_FogCoord|gl_FogFragCoord|gl_FogParameters|gl_FragColor|gl_FragCoord|gl_FragData|gl_FragDepth|gl_FrontColor|gl_FrontFacing|gl_FrontLightModelProduct|gl_FrontLightProduct|gl_FrontMaterial|gl_FrontSecondaryColor|gl_InstanceID|gl_Layer|gl_LightModel|gl_LightModelParameters|gl_LightModelProducts|gl_LightProducts|gl_LightSource|gl_LightSourceParameters|gl_MaterialParameters|gl_ModelViewMatrix|gl_ModelViewMatrixInverse|gl_ModelViewMatrixInverseTranspose|gl_ModelViewMatrixTranspose|gl_ModelViewProjectionMatrix|gl_ModelViewProjectionMatrixInverse|gl_ModelViewProjectionMatrixInverseTranspose|gl_ModelViewProjectionMatrixTranspose|gl_MultiTexCoord[0-7]|gl_Normal|gl_NormalMatrix|gl_NormalScale|gl_ObjectPlaneQ|gl_ObjectPlaneR|gl_ObjectPlaneS|gl_ObjectPlaneT|gl_Point|gl_PointCoord|gl_PointParameters|gl_PointSize|gl_Position|gl_PrimitiveIDIn|gl_ProjectionMatrix|gl_ProjectionMatrixInverse|gl_ProjectionMatrixInverseTranspose|gl_ProjectionMatrixTranspose|gl_SecondaryColor|gl_TexCoord|gl_TextureEnvColor|gl_TextureMatrix|gl_TextureMatrixInverse|gl_TextureMatrixInverseTranspose|gl_TextureMatrixTranspose|gl_Vertex|gl_VertexID)\\b",
|
85
85
|
"name": "support.variable.glsl"
|
86
86
|
},
|
87
87
|
{
|
data/grammars/source.js.json
CHANGED
@@ -467,7 +467,7 @@
|
|
467
467
|
]
|
468
468
|
},
|
469
469
|
{
|
470
|
-
"begin": "(?x)\n(?=\n (?!\n (break|case|catch|continue|do|else|finally|for|function|if|\n
|
470
|
+
"begin": "(?x)\n(?=\n (?!\n (break|case|catch|continue|do|else|finally|for|function|if|\n return|switch|throw|try|while|with)\n [\\s\\(]\n )\n (\n \\b(get|set) # Property getter/setter: get foo(){}\n (?:\\s+|(?=\\[)) # Followed by whitespace or square bracket\n )?+\n ( # Method name\n \\b[a-zA-Z_$][\\w$]* # Fixed name\n |\n \\[ # Computed property key\n [^\\[\\]]++ # Contains at least one non-brace character\n \\]\n )\n \\s*\\(\\s* # Start of arguments list\n (\n \"[^\"]*\" | # Double-quoted string\n '[^']*' | # Single-quoted string\n [^\"()'] # Any non-bracket or non-quote\n )*\n \\)\\s* # End of arguments\n { # Beginning of body\n)",
|
471
471
|
"end": "(?<=})",
|
472
472
|
"patterns": [
|
473
473
|
{
|
@@ -899,7 +899,11 @@
|
|
899
899
|
"name": "meta.control.yield.js"
|
900
900
|
},
|
901
901
|
{
|
902
|
-
"match": "(?<!\\.)\\b(await
|
902
|
+
"match": "(?:(?<=\\.{3})|(?<!\\.))\\b(await)(?!\\s*:)\\b",
|
903
|
+
"name": "keyword.control.js"
|
904
|
+
},
|
905
|
+
{
|
906
|
+
"match": "(?<!\\.)\\b(break|catch|continue|do|else|finally|for|if|import|package|return|throw|try|while|with)(?!\\s*:)\\b",
|
903
907
|
"name": "keyword.control.js"
|
904
908
|
},
|
905
909
|
{
|
data/grammars/source.meson.json
CHANGED
@@ -60,7 +60,7 @@
|
|
60
60
|
"name": "keyword.operator.arithmetic.meson"
|
61
61
|
},
|
62
62
|
{
|
63
|
-
"match": "(?x)\\b(add_global_arguments|add_global_link_arguments|
|
63
|
+
"match": "(?x)\\b(add_global_arguments|add_project_arguments|add_global_link_arguments|add_project_link_arguments|add_test_setup|add_languages|assert|benchmark|build_target|configuration_data|configure_file|custom_target|declare_dependency|dependency|environment|error|executable|generator|gettext|get_option|get_variable|files|find_library|find_program|include_directories|import|install_data|install_headers|install_man|install_subdir|is_variable|jar|join_paths|library|message|option|project|run_target|run_command|set_variable|subdir|subproject|shared_library|shared_module|static_library|test|vcs_tag\n)\\b\\s*(?=\\()",
|
64
64
|
"name": "support.function.builtin.meson"
|
65
65
|
}
|
66
66
|
],
|
data/grammars/source.tsx.json
CHANGED
@@ -27,15 +27,12 @@
|
|
27
27
|
},
|
28
28
|
{
|
29
29
|
"include": "#jsx-tag-in-expression"
|
30
|
-
},
|
31
|
-
{
|
32
|
-
"include": "#jsx-tag-invalid"
|
33
30
|
}
|
34
31
|
]
|
35
32
|
},
|
36
33
|
"jsx-tag-without-attributes-in-expression": {
|
37
|
-
"begin": "(?x)\n (?<=[({\\[,?=>:*]|&&|\\|\\||\\?|\\Wreturn|^return|\\Wdefault|^)\\s*\n (?=(<)\\s*((?:[a-z][a-z0-9]*|([_$a-zA-Z][-$\\w.]*))(?<!\\.|-))
|
38
|
-
"end": "(?!\\s*(<)\\s*((?:[a-z][a-z0-9]*|([_$a-zA-Z][-$\\w.]*))(?<!\\.|-))
|
34
|
+
"begin": "(?x)\n (?<=[({\\[,?=>:*]|&&|\\|\\||\\?|\\Wreturn|^return|\\Wdefault|^)\\s*\n (?=(<)\\s*((?:[a-z][a-z0-9]*|([_$a-zA-Z][-$\\w.]*))(?<!\\.|-))?\\s*(>))",
|
35
|
+
"end": "(?!\\s*(<)\\s*((?:[a-z][a-z0-9]*|([_$a-zA-Z][-$\\w.]*))(?<!\\.|-))?\\s*(>))",
|
39
36
|
"patterns": [
|
40
37
|
{
|
41
38
|
"include": "#jsx-tag-without-attributes"
|
@@ -44,8 +41,8 @@
|
|
44
41
|
},
|
45
42
|
"jsx-tag-without-attributes": {
|
46
43
|
"name": "meta.tag.without-attributes.tsx",
|
47
|
-
"begin": "(<)\\s*((?:[a-z][a-z0-9]*|([_$a-zA-Z][-$\\w.]*))(?<!\\.|-))
|
48
|
-
"end": "(</)\\s*((?:[a-z][a-z0-9]*|([_$a-zA-Z][-$\\w.]*))(?<!\\.|-))
|
44
|
+
"begin": "(<)\\s*((?:[a-z][a-z0-9]*|([_$a-zA-Z][-$\\w.]*))(?<!\\.|-))?\\s*(>)",
|
45
|
+
"end": "(</)\\s*((?:[a-z][a-z0-9]*|([_$a-zA-Z][-$\\w.]*))(?<!\\.|-))?\\s*(>)",
|
49
46
|
"beginCaptures": {
|
50
47
|
"1": {
|
51
48
|
"name": "punctuation.definition.tag.begin.tsx"
|
@@ -188,10 +185,6 @@
|
|
188
185
|
}
|
189
186
|
]
|
190
187
|
},
|
191
|
-
"jsx-tag-invalid": {
|
192
|
-
"name": "invalid.illegal.tag.incomplete.tsx",
|
193
|
-
"match": "<\\s*>"
|
194
|
-
},
|
195
188
|
"jsx-children": {
|
196
189
|
"patterns": [
|
197
190
|
{
|
@@ -200,9 +193,6 @@
|
|
200
193
|
{
|
201
194
|
"include": "#jsx-child-tag"
|
202
195
|
},
|
203
|
-
{
|
204
|
-
"include": "#jsx-tag-invalid"
|
205
|
-
},
|
206
196
|
{
|
207
197
|
"include": "#jsx-evaluated-code"
|
208
198
|
},
|
data/grammars/source.wdl.json
CHANGED
@@ -48,7 +48,7 @@
|
|
48
48
|
"repository": {
|
49
49
|
"builtin_types": {
|
50
50
|
"name": "support.type.wdl",
|
51
|
-
"match": "(?<!\\.)\\b(Array|Boolean|File|Float|Int|Map|Object|String|
|
51
|
+
"match": "(?<!\\.)\\b(Array|Boolean|File|Float|Int|Map|Object|String|Pair)\\b"
|
52
52
|
},
|
53
53
|
"comments": {
|
54
54
|
"patterns": [
|
@@ -129,7 +129,7 @@
|
|
129
129
|
"patterns": [
|
130
130
|
{
|
131
131
|
"name": "keyword.other.wdl",
|
132
|
-
"match": "\\s*(call|command|output|runtime|task|workflow|if|then|else|import|as|input|output|meta|parameter_meta)\\s+"
|
132
|
+
"match": "\\s*(call|command|output|runtime|task|workflow|if|then|else|import|as|input|output|meta|parameter_meta|scatter)\\s+"
|
133
133
|
}
|
134
134
|
]
|
135
135
|
},
|
data/grammars/text.roff.json
CHANGED
@@ -492,6 +492,9 @@
|
|
492
492
|
},
|
493
493
|
"contentName": "string.unquoted.roff",
|
494
494
|
"patterns": [
|
495
|
+
{
|
496
|
+
"include": "#escapes-clipped"
|
497
|
+
},
|
495
498
|
{
|
496
499
|
"include": "#escapes"
|
497
500
|
}
|
@@ -751,7 +754,7 @@
|
|
751
754
|
"definition": {
|
752
755
|
"patterns": [
|
753
756
|
{
|
754
|
-
"name": "meta.
|
757
|
+
"name": "meta.macro.definition.$2.roff",
|
755
758
|
"begin": "^([.'])[ \t]*((dei?1?)|(ami?1?))\\s+(\\S+?)?\\s*(\\\\[\"#].*)?$",
|
756
759
|
"end": "^(?:[ \t]*\\x5C{2})?\\.[ \t]*\\.",
|
757
760
|
"beginCaptures": {
|
@@ -794,7 +797,7 @@
|
|
794
797
|
]
|
795
798
|
},
|
796
799
|
{
|
797
|
-
"name": "meta.
|
800
|
+
"name": "meta.macro.definition.with-terminator.$2.roff",
|
798
801
|
"begin": "^([.'])[ \t]*((dei?1?)|(ami?1?))\\s+(\\S+)\\s*(\"[^\"]+\"?|\\S+?(?=\\s|\\\\[\"#]))?(.*)$",
|
799
802
|
"end": "^(\\.)[ \t]*((\\6)(?=$|\\s|\\\\(?:$|\")))",
|
800
803
|
"beginCaptures": {
|
@@ -1139,14 +1142,7 @@
|
|
1139
1142
|
}
|
1140
1143
|
},
|
1141
1144
|
{
|
1142
|
-
"
|
1143
|
-
"begin": "(\\\\)$\\n?",
|
1144
|
-
"end": "^(?:[.'])?",
|
1145
|
-
"beginCaptures": {
|
1146
|
-
"1": {
|
1147
|
-
"name": "punctuation.definition.escape.roff"
|
1148
|
-
}
|
1149
|
-
}
|
1145
|
+
"include": "#continuous-newline"
|
1150
1146
|
},
|
1151
1147
|
{
|
1152
1148
|
"include": "#register-expansion"
|
@@ -1578,6 +1574,121 @@
|
|
1578
1574
|
}
|
1579
1575
|
]
|
1580
1576
|
},
|
1577
|
+
"continuous-newline": {
|
1578
|
+
"begin": "(\\\\)?(\\\\)$\\n?",
|
1579
|
+
"end": "^(?:[.'])?",
|
1580
|
+
"beginCaptures": {
|
1581
|
+
"0": {
|
1582
|
+
"name": "constant.character.escape.newline.roff"
|
1583
|
+
},
|
1584
|
+
"1": {
|
1585
|
+
"name": "punctuation.definition.concealed.escape.backslash.roff"
|
1586
|
+
},
|
1587
|
+
"2": {
|
1588
|
+
"name": "punctuation.definition.escape.roff"
|
1589
|
+
}
|
1590
|
+
}
|
1591
|
+
},
|
1592
|
+
"escapes-clipped": {
|
1593
|
+
"patterns": [
|
1594
|
+
{
|
1595
|
+
"begin": "\\\\f(?:[I2]|\\(CI|\\[\\s*(?:[I2]|CI)\\s*\\])",
|
1596
|
+
"end": "$|(?=\\\\f[\\[A-Za-z0-9])",
|
1597
|
+
"beginCaptures": {
|
1598
|
+
"0": {
|
1599
|
+
"patterns": [
|
1600
|
+
{
|
1601
|
+
"include": "#escapes"
|
1602
|
+
}
|
1603
|
+
]
|
1604
|
+
}
|
1605
|
+
},
|
1606
|
+
"patterns": [
|
1607
|
+
{
|
1608
|
+
"include": "#escaped-newline"
|
1609
|
+
},
|
1610
|
+
{
|
1611
|
+
"include": "$self"
|
1612
|
+
},
|
1613
|
+
{
|
1614
|
+
"include": "#italic-word"
|
1615
|
+
}
|
1616
|
+
]
|
1617
|
+
},
|
1618
|
+
{
|
1619
|
+
"begin": "\\\\f(?:[B3]|\\(CB|\\[\\s*(?:[B3]|CB)\\s*\\])",
|
1620
|
+
"end": "$|(?=\\\\f[\\[A-Za-z0-9])",
|
1621
|
+
"beginCaptures": {
|
1622
|
+
"0": {
|
1623
|
+
"patterns": [
|
1624
|
+
{
|
1625
|
+
"include": "#escapes"
|
1626
|
+
}
|
1627
|
+
]
|
1628
|
+
}
|
1629
|
+
},
|
1630
|
+
"patterns": [
|
1631
|
+
{
|
1632
|
+
"include": "#escaped-newline"
|
1633
|
+
},
|
1634
|
+
{
|
1635
|
+
"include": "$self"
|
1636
|
+
},
|
1637
|
+
{
|
1638
|
+
"include": "#bold-word"
|
1639
|
+
}
|
1640
|
+
]
|
1641
|
+
},
|
1642
|
+
{
|
1643
|
+
"begin": "\\\\f(?:4|\\(BI|\\[\\s*BI\\s*\\])",
|
1644
|
+
"end": "$|(?=\\\\f[\\[A-Za-z0-9])",
|
1645
|
+
"beginCaptures": {
|
1646
|
+
"0": {
|
1647
|
+
"patterns": [
|
1648
|
+
{
|
1649
|
+
"include": "#escapes"
|
1650
|
+
}
|
1651
|
+
]
|
1652
|
+
}
|
1653
|
+
},
|
1654
|
+
"patterns": [
|
1655
|
+
{
|
1656
|
+
"include": "#escaped-newline"
|
1657
|
+
},
|
1658
|
+
{
|
1659
|
+
"include": "$self"
|
1660
|
+
},
|
1661
|
+
{
|
1662
|
+
"include": "#bold-italic-word"
|
1663
|
+
}
|
1664
|
+
]
|
1665
|
+
},
|
1666
|
+
{
|
1667
|
+
"begin": "\\\\f(?:\\(C[WR]|\\[\\s*C[WR]\\s*\\])",
|
1668
|
+
"end": "$|(?=\\\\f[\\[A-Za-z0-9])",
|
1669
|
+
"beginCaptures": {
|
1670
|
+
"0": {
|
1671
|
+
"patterns": [
|
1672
|
+
{
|
1673
|
+
"include": "#escapes"
|
1674
|
+
}
|
1675
|
+
]
|
1676
|
+
}
|
1677
|
+
},
|
1678
|
+
"patterns": [
|
1679
|
+
{
|
1680
|
+
"include": "#escaped-newline"
|
1681
|
+
},
|
1682
|
+
{
|
1683
|
+
"include": "$self"
|
1684
|
+
},
|
1685
|
+
{
|
1686
|
+
"include": "#monospace-word"
|
1687
|
+
}
|
1688
|
+
]
|
1689
|
+
}
|
1690
|
+
]
|
1691
|
+
},
|
1581
1692
|
"escapes-full": {
|
1582
1693
|
"patterns": [
|
1583
1694
|
{
|
@@ -2406,6 +2517,34 @@
|
|
2406
2517
|
}
|
2407
2518
|
]
|
2408
2519
|
},
|
2520
|
+
{
|
2521
|
+
"name": "meta.shell-snippet.mdoc.roff",
|
2522
|
+
"begin": "^(?:\\S*.*?\\s+)?(?i:Bash|(?:Bourne[\\s-]?)?Shell(?:[\\s-]?Script)?):\\s*$\\n?",
|
2523
|
+
"end": "^(?!\\t|\\s*$)",
|
2524
|
+
"beginCaptures": {
|
2525
|
+
"0": {
|
2526
|
+
"patterns": [
|
2527
|
+
{
|
2528
|
+
"include": "#main"
|
2529
|
+
}
|
2530
|
+
]
|
2531
|
+
}
|
2532
|
+
},
|
2533
|
+
"patterns": [
|
2534
|
+
{
|
2535
|
+
"match": ".+",
|
2536
|
+
"captures": {
|
2537
|
+
"0": {
|
2538
|
+
"patterns": [
|
2539
|
+
{
|
2540
|
+
"include": "source.shell"
|
2541
|
+
}
|
2542
|
+
]
|
2543
|
+
}
|
2544
|
+
}
|
2545
|
+
}
|
2546
|
+
]
|
2547
|
+
},
|
2409
2548
|
{
|
2410
2549
|
"include": "#main"
|
2411
2550
|
}
|
@@ -3349,7 +3488,7 @@
|
|
3349
3488
|
]
|
3350
3489
|
},
|
3351
3490
|
{
|
3352
|
-
"begin": "^([.'])[ \t]*(TP)(?=\\s|\\\\[\"#])(.*)?$\\n?",
|
3491
|
+
"begin": "^([.'])[ \t]*(TP|TQ)(?=\\s|\\\\[\"#])(.*)?$\\n?",
|
3353
3492
|
"end": "^(.*)(?<!\\\\)$",
|
3354
3493
|
"patterns": [
|
3355
3494
|
{
|
@@ -3384,42 +3523,17 @@
|
|
3384
3523
|
}
|
3385
3524
|
},
|
3386
3525
|
"endCaptures": {
|
3387
|
-
"1": {
|
3388
|
-
"patterns": [
|
3389
|
-
{
|
3390
|
-
"include": "$self"
|
3391
|
-
}
|
3392
|
-
]
|
3393
|
-
}
|
3394
|
-
}
|
3395
|
-
},
|
3396
|
-
{
|
3397
|
-
"name": "markup.list.unnumbered.man.macro.gnu.roff",
|
3398
|
-
"begin": "^([.'])[ \t]*(TQ)[ \t]*(\\\\[#\"].*)?$",
|
3399
|
-
"end": "^(?=[.'][ \t]*TP(?:\\s|\\\\[#\"]))",
|
3400
|
-
"beginCaptures": {
|
3401
3526
|
"0": {
|
3402
|
-
"name": "
|
3527
|
+
"name": "markup.heading.paragraph.roff"
|
3403
3528
|
},
|
3404
3529
|
"1": {
|
3405
|
-
"name": "punctuation.definition.macro.roff"
|
3406
|
-
},
|
3407
|
-
"2": {
|
3408
|
-
"name": "entity.function.name.gnu.roff"
|
3409
|
-
},
|
3410
|
-
"3": {
|
3411
3530
|
"patterns": [
|
3412
3531
|
{
|
3413
|
-
"include": "
|
3532
|
+
"include": "$self"
|
3414
3533
|
}
|
3415
3534
|
]
|
3416
3535
|
}
|
3417
|
-
}
|
3418
|
-
"patterns": [
|
3419
|
-
{
|
3420
|
-
"include": "$self"
|
3421
|
-
}
|
3422
|
-
]
|
3536
|
+
}
|
3423
3537
|
},
|
3424
3538
|
{
|
3425
3539
|
"name": "meta.deprecated.function.hanging-paragraph.man.macro.roff",
|
@@ -4827,8 +4941,8 @@
|
|
4827
4941
|
}
|
4828
4942
|
},
|
4829
4943
|
{
|
4830
|
-
"begin": "^([.'])[ \t]*(\\[)(
|
4831
|
-
"end": "^([.'])[ \t]*(\\])(
|
4944
|
+
"begin": "^([.'])[ \t]*(\\[)\\s*([-$'\\w.\\\\]*?)\\s*(\\\\[\"#].*)?$",
|
4945
|
+
"end": "^([.'])[ \t]*(\\])\\s*([-$'\\w.\\\\]*?)(?=\\s|$|\\\\\")",
|
4832
4946
|
"contentName": "meta.citation.roff",
|
4833
4947
|
"patterns": [
|
4834
4948
|
{
|
@@ -6,7 +6,7 @@
|
|
6
6
|
],
|
7
7
|
"patterns": [
|
8
8
|
{
|
9
|
-
"match": "(?x) ^ (?: ( (?:\\(\\S+\\))? (?: sh\\S*? | \\w+\\S+[@:]\\S+(?:\\s+\\S+)? | \\[\\S+[@:][^\\n]+\\].+ ) ) \\s* )? ( [>$#%] ) \\s+ (.*) $",
|
9
|
+
"match": "(?x) ^ (?: ( (?:\\(\\S+\\))? (?: sh\\S*? | \\w+\\S+[@:]\\S+(?:\\s+\\S+)? | \\[\\S+[@:][^\\n]+\\].+ ) ) \\s* )? ( [>$#%] | \\p{Greek} ) \\s+ (.*) $",
|
10
10
|
"captures": {
|
11
11
|
"1": {
|
12
12
|
"name": "entity.other.prompt-prefix.shell-session"
|
data/lib/linguist/blob_helper.rb
CHANGED
@@ -275,10 +275,8 @@ module Linguist
|
|
275
275
|
# also--importantly--without having to duplicate many (potentially
|
276
276
|
# large) strings.
|
277
277
|
begin
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
data.split(Regexp.union(encoded_newlines), -1)
|
278
|
+
|
279
|
+
data.split(encoded_newlines_re, -1)
|
282
280
|
rescue Encoding::ConverterNotFoundError
|
283
281
|
# The data is not splittable in the detected encoding. Assume it's
|
284
282
|
# one big line.
|
@@ -289,6 +287,51 @@ module Linguist
|
|
289
287
|
end
|
290
288
|
end
|
291
289
|
|
290
|
+
def encoded_newlines_re
|
291
|
+
@encoded_newlines_re ||= Regexp.union(["\r\n", "\r", "\n"].
|
292
|
+
map { |nl| nl.encode(ruby_encoding, "ASCII-8BIT").force_encoding(data.encoding) })
|
293
|
+
|
294
|
+
end
|
295
|
+
|
296
|
+
def first_lines(n)
|
297
|
+
return lines[0...n] if defined? @lines
|
298
|
+
return [] unless viewable? && data
|
299
|
+
|
300
|
+
i, c = 0, 0
|
301
|
+
while c < n && j = data.index(encoded_newlines_re, i)
|
302
|
+
i = j + $&.length
|
303
|
+
c += 1
|
304
|
+
end
|
305
|
+
data[0...i].split(encoded_newlines_re, -1)
|
306
|
+
end
|
307
|
+
|
308
|
+
def last_lines(n)
|
309
|
+
if defined? @lines
|
310
|
+
if n >= @lines.length
|
311
|
+
@lines
|
312
|
+
else
|
313
|
+
lines[-n..-1]
|
314
|
+
end
|
315
|
+
end
|
316
|
+
return [] unless viewable? && data
|
317
|
+
|
318
|
+
no_eol = true
|
319
|
+
i, c = data.length, 0
|
320
|
+
k = i
|
321
|
+
while c < n && j = data.rindex(encoded_newlines_re, i - 1)
|
322
|
+
if c == 0 && j + $&.length == i
|
323
|
+
no_eol = false
|
324
|
+
n += 1
|
325
|
+
end
|
326
|
+
i = j
|
327
|
+
k = j + $&.length
|
328
|
+
c += 1
|
329
|
+
end
|
330
|
+
r = data[k..-1].split(encoded_newlines_re, -1)
|
331
|
+
r.pop if !no_eol
|
332
|
+
r
|
333
|
+
end
|
334
|
+
|
292
335
|
# Public: Get number of lines of code
|
293
336
|
#
|
294
337
|
# Requires Blob#data
|
data/lib/linguist/classifier.rb
CHANGED
@@ -3,6 +3,8 @@ require 'linguist/tokenizer'
|
|
3
3
|
module Linguist
|
4
4
|
# Language bayesian classifier.
|
5
5
|
class Classifier
|
6
|
+
CLASSIFIER_CONSIDER_BYTES = 50 * 1024
|
7
|
+
|
6
8
|
# Public: Use the classifier to detect language of the blob.
|
7
9
|
#
|
8
10
|
# blob - An object that quacks like a blob.
|
@@ -17,7 +19,7 @@ module Linguist
|
|
17
19
|
# Returns an Array of Language objects, most probable first.
|
18
20
|
def self.call(blob, possible_languages)
|
19
21
|
language_names = possible_languages.map(&:name)
|
20
|
-
classify(Samples.cache, blob.data, language_names).map do |name, _|
|
22
|
+
classify(Samples.cache, blob.data[0...CLASSIFIER_CONSIDER_BYTES], language_names).map do |name, _|
|
21
23
|
Language[name] # Return the actual Language objects
|
22
24
|
end
|
23
25
|
end
|