github-linguist 5.3.1 → 5.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/linguist/extconf.rb +3 -0
- data/ext/linguist/lex.linguist_yy.c +8269 -0
- data/ext/linguist/lex.linguist_yy.h +353 -0
- data/ext/linguist/linguist.c +64 -0
- data/ext/linguist/linguist.h +11 -0
- data/ext/linguist/tokenizer.l +119 -0
- data/grammars/source.coffee.json +123 -41
- data/grammars/source.crystal.json +2 -2
- data/grammars/source.css.less.json +319 -27
- data/grammars/source.glsl.json +1 -1
- data/grammars/source.js.json +6 -2
- data/grammars/source.meson.json +1 -1
- data/grammars/source.tsx.json +4 -14
- data/grammars/source.wdl.json +2 -2
- data/grammars/text.roff.json +155 -41
- data/grammars/text.shell-session.json +1 -1
- data/lib/linguist/blob_helper.rb +47 -4
- data/lib/linguist/classifier.rb +3 -1
- data/lib/linguist/file_blob.rb +3 -3
- data/lib/linguist/heuristics.rb +15 -6
- data/lib/linguist/linguist.bundle +0 -0
- data/lib/linguist/samples.json +49989 -44225
- data/lib/linguist/strategy/modeline.rb +2 -2
- data/lib/linguist/tokenizer.rb +1 -186
- data/lib/linguist/version.rb +1 -1
- metadata +25 -3
data/grammars/source.glsl.json
CHANGED
@@ -81,7 +81,7 @@
|
|
81
81
|
"name": "storage.modifier.glsl"
|
82
82
|
},
|
83
83
|
{
|
84
|
-
"match": "\\b(gl_BackColor|gl_BackLightModelProduct|gl_BackLightProduct|gl_BackMaterial|gl_BackSecondaryColor|gl_ClipDistance|gl_ClipPlane|gl_ClipVertex|gl_Color|gl_DepthRange|gl_DepthRangeParameters|gl_EyePlaneQ|gl_EyePlaneR|gl_EyePlaneS|gl_EyePlaneT|gl_Fog|gl_FogCoord|gl_FogFragCoord|gl_FogParameters|gl_FragColor|gl_FragCoord|
|
84
|
+
"match": "\\b(gl_BackColor|gl_BackLightModelProduct|gl_BackLightProduct|gl_BackMaterial|gl_BackSecondaryColor|gl_ClipDistance|gl_ClipPlane|gl_ClipVertex|gl_Color|gl_DepthRange|gl_DepthRangeParameters|gl_EyePlaneQ|gl_EyePlaneR|gl_EyePlaneS|gl_EyePlaneT|gl_Fog|gl_FogCoord|gl_FogFragCoord|gl_FogParameters|gl_FragColor|gl_FragCoord|gl_FragData|gl_FragDepth|gl_FrontColor|gl_FrontFacing|gl_FrontLightModelProduct|gl_FrontLightProduct|gl_FrontMaterial|gl_FrontSecondaryColor|gl_InstanceID|gl_Layer|gl_LightModel|gl_LightModelParameters|gl_LightModelProducts|gl_LightProducts|gl_LightSource|gl_LightSourceParameters|gl_MaterialParameters|gl_ModelViewMatrix|gl_ModelViewMatrixInverse|gl_ModelViewMatrixInverseTranspose|gl_ModelViewMatrixTranspose|gl_ModelViewProjectionMatrix|gl_ModelViewProjectionMatrixInverse|gl_ModelViewProjectionMatrixInverseTranspose|gl_ModelViewProjectionMatrixTranspose|gl_MultiTexCoord[0-7]|gl_Normal|gl_NormalMatrix|gl_NormalScale|gl_ObjectPlaneQ|gl_ObjectPlaneR|gl_ObjectPlaneS|gl_ObjectPlaneT|gl_Point|gl_PointCoord|gl_PointParameters|gl_PointSize|gl_Position|gl_PrimitiveIDIn|gl_ProjectionMatrix|gl_ProjectionMatrixInverse|gl_ProjectionMatrixInverseTranspose|gl_ProjectionMatrixTranspose|gl_SecondaryColor|gl_TexCoord|gl_TextureEnvColor|gl_TextureMatrix|gl_TextureMatrixInverse|gl_TextureMatrixInverseTranspose|gl_TextureMatrixTranspose|gl_Vertex|gl_VertexID)\\b",
|
85
85
|
"name": "support.variable.glsl"
|
86
86
|
},
|
87
87
|
{
|
data/grammars/source.js.json
CHANGED
@@ -467,7 +467,7 @@
|
|
467
467
|
]
|
468
468
|
},
|
469
469
|
{
|
470
|
-
"begin": "(?x)\n(?=\n (?!\n (break|case|catch|continue|do|else|finally|for|function|if|\n
|
470
|
+
"begin": "(?x)\n(?=\n (?!\n (break|case|catch|continue|do|else|finally|for|function|if|\n return|switch|throw|try|while|with)\n [\\s\\(]\n )\n (\n \\b(get|set) # Property getter/setter: get foo(){}\n (?:\\s+|(?=\\[)) # Followed by whitespace or square bracket\n )?+\n ( # Method name\n \\b[a-zA-Z_$][\\w$]* # Fixed name\n |\n \\[ # Computed property key\n [^\\[\\]]++ # Contains at least one non-brace character\n \\]\n )\n \\s*\\(\\s* # Start of arguments list\n (\n \"[^\"]*\" | # Double-quoted string\n '[^']*' | # Single-quoted string\n [^\"()'] # Any non-bracket or non-quote\n )*\n \\)\\s* # End of arguments\n { # Beginning of body\n)",
|
471
471
|
"end": "(?<=})",
|
472
472
|
"patterns": [
|
473
473
|
{
|
@@ -899,7 +899,11 @@
|
|
899
899
|
"name": "meta.control.yield.js"
|
900
900
|
},
|
901
901
|
{
|
902
|
-
"match": "(?<!\\.)\\b(await
|
902
|
+
"match": "(?:(?<=\\.{3})|(?<!\\.))\\b(await)(?!\\s*:)\\b",
|
903
|
+
"name": "keyword.control.js"
|
904
|
+
},
|
905
|
+
{
|
906
|
+
"match": "(?<!\\.)\\b(break|catch|continue|do|else|finally|for|if|import|package|return|throw|try|while|with)(?!\\s*:)\\b",
|
903
907
|
"name": "keyword.control.js"
|
904
908
|
},
|
905
909
|
{
|
data/grammars/source.meson.json
CHANGED
@@ -60,7 +60,7 @@
|
|
60
60
|
"name": "keyword.operator.arithmetic.meson"
|
61
61
|
},
|
62
62
|
{
|
63
|
-
"match": "(?x)\\b(add_global_arguments|add_global_link_arguments|
|
63
|
+
"match": "(?x)\\b(add_global_arguments|add_project_arguments|add_global_link_arguments|add_project_link_arguments|add_test_setup|add_languages|assert|benchmark|build_target|configuration_data|configure_file|custom_target|declare_dependency|dependency|environment|error|executable|generator|gettext|get_option|get_variable|files|find_library|find_program|include_directories|import|install_data|install_headers|install_man|install_subdir|is_variable|jar|join_paths|library|message|option|project|run_target|run_command|set_variable|subdir|subproject|shared_library|shared_module|static_library|test|vcs_tag\n)\\b\\s*(?=\\()",
|
64
64
|
"name": "support.function.builtin.meson"
|
65
65
|
}
|
66
66
|
],
|
data/grammars/source.tsx.json
CHANGED
@@ -27,15 +27,12 @@
|
|
27
27
|
},
|
28
28
|
{
|
29
29
|
"include": "#jsx-tag-in-expression"
|
30
|
-
},
|
31
|
-
{
|
32
|
-
"include": "#jsx-tag-invalid"
|
33
30
|
}
|
34
31
|
]
|
35
32
|
},
|
36
33
|
"jsx-tag-without-attributes-in-expression": {
|
37
|
-
"begin": "(?x)\n (?<=[({\\[,?=>:*]|&&|\\|\\||\\?|\\Wreturn|^return|\\Wdefault|^)\\s*\n (?=(<)\\s*((?:[a-z][a-z0-9]*|([_$a-zA-Z][-$\\w.]*))(?<!\\.|-))
|
38
|
-
"end": "(?!\\s*(<)\\s*((?:[a-z][a-z0-9]*|([_$a-zA-Z][-$\\w.]*))(?<!\\.|-))
|
34
|
+
"begin": "(?x)\n (?<=[({\\[,?=>:*]|&&|\\|\\||\\?|\\Wreturn|^return|\\Wdefault|^)\\s*\n (?=(<)\\s*((?:[a-z][a-z0-9]*|([_$a-zA-Z][-$\\w.]*))(?<!\\.|-))?\\s*(>))",
|
35
|
+
"end": "(?!\\s*(<)\\s*((?:[a-z][a-z0-9]*|([_$a-zA-Z][-$\\w.]*))(?<!\\.|-))?\\s*(>))",
|
39
36
|
"patterns": [
|
40
37
|
{
|
41
38
|
"include": "#jsx-tag-without-attributes"
|
@@ -44,8 +41,8 @@
|
|
44
41
|
},
|
45
42
|
"jsx-tag-without-attributes": {
|
46
43
|
"name": "meta.tag.without-attributes.tsx",
|
47
|
-
"begin": "(<)\\s*((?:[a-z][a-z0-9]*|([_$a-zA-Z][-$\\w.]*))(?<!\\.|-))
|
48
|
-
"end": "(</)\\s*((?:[a-z][a-z0-9]*|([_$a-zA-Z][-$\\w.]*))(?<!\\.|-))
|
44
|
+
"begin": "(<)\\s*((?:[a-z][a-z0-9]*|([_$a-zA-Z][-$\\w.]*))(?<!\\.|-))?\\s*(>)",
|
45
|
+
"end": "(</)\\s*((?:[a-z][a-z0-9]*|([_$a-zA-Z][-$\\w.]*))(?<!\\.|-))?\\s*(>)",
|
49
46
|
"beginCaptures": {
|
50
47
|
"1": {
|
51
48
|
"name": "punctuation.definition.tag.begin.tsx"
|
@@ -188,10 +185,6 @@
|
|
188
185
|
}
|
189
186
|
]
|
190
187
|
},
|
191
|
-
"jsx-tag-invalid": {
|
192
|
-
"name": "invalid.illegal.tag.incomplete.tsx",
|
193
|
-
"match": "<\\s*>"
|
194
|
-
},
|
195
188
|
"jsx-children": {
|
196
189
|
"patterns": [
|
197
190
|
{
|
@@ -200,9 +193,6 @@
|
|
200
193
|
{
|
201
194
|
"include": "#jsx-child-tag"
|
202
195
|
},
|
203
|
-
{
|
204
|
-
"include": "#jsx-tag-invalid"
|
205
|
-
},
|
206
196
|
{
|
207
197
|
"include": "#jsx-evaluated-code"
|
208
198
|
},
|
data/grammars/source.wdl.json
CHANGED
@@ -48,7 +48,7 @@
|
|
48
48
|
"repository": {
|
49
49
|
"builtin_types": {
|
50
50
|
"name": "support.type.wdl",
|
51
|
-
"match": "(?<!\\.)\\b(Array|Boolean|File|Float|Int|Map|Object|String|
|
51
|
+
"match": "(?<!\\.)\\b(Array|Boolean|File|Float|Int|Map|Object|String|Pair)\\b"
|
52
52
|
},
|
53
53
|
"comments": {
|
54
54
|
"patterns": [
|
@@ -129,7 +129,7 @@
|
|
129
129
|
"patterns": [
|
130
130
|
{
|
131
131
|
"name": "keyword.other.wdl",
|
132
|
-
"match": "\\s*(call|command|output|runtime|task|workflow|if|then|else|import|as|input|output|meta|parameter_meta)\\s+"
|
132
|
+
"match": "\\s*(call|command|output|runtime|task|workflow|if|then|else|import|as|input|output|meta|parameter_meta|scatter)\\s+"
|
133
133
|
}
|
134
134
|
]
|
135
135
|
},
|
data/grammars/text.roff.json
CHANGED
@@ -492,6 +492,9 @@
|
|
492
492
|
},
|
493
493
|
"contentName": "string.unquoted.roff",
|
494
494
|
"patterns": [
|
495
|
+
{
|
496
|
+
"include": "#escapes-clipped"
|
497
|
+
},
|
495
498
|
{
|
496
499
|
"include": "#escapes"
|
497
500
|
}
|
@@ -751,7 +754,7 @@
|
|
751
754
|
"definition": {
|
752
755
|
"patterns": [
|
753
756
|
{
|
754
|
-
"name": "meta.
|
757
|
+
"name": "meta.macro.definition.$2.roff",
|
755
758
|
"begin": "^([.'])[ \t]*((dei?1?)|(ami?1?))\\s+(\\S+?)?\\s*(\\\\[\"#].*)?$",
|
756
759
|
"end": "^(?:[ \t]*\\x5C{2})?\\.[ \t]*\\.",
|
757
760
|
"beginCaptures": {
|
@@ -794,7 +797,7 @@
|
|
794
797
|
]
|
795
798
|
},
|
796
799
|
{
|
797
|
-
"name": "meta.
|
800
|
+
"name": "meta.macro.definition.with-terminator.$2.roff",
|
798
801
|
"begin": "^([.'])[ \t]*((dei?1?)|(ami?1?))\\s+(\\S+)\\s*(\"[^\"]+\"?|\\S+?(?=\\s|\\\\[\"#]))?(.*)$",
|
799
802
|
"end": "^(\\.)[ \t]*((\\6)(?=$|\\s|\\\\(?:$|\")))",
|
800
803
|
"beginCaptures": {
|
@@ -1139,14 +1142,7 @@
|
|
1139
1142
|
}
|
1140
1143
|
},
|
1141
1144
|
{
|
1142
|
-
"
|
1143
|
-
"begin": "(\\\\)$\\n?",
|
1144
|
-
"end": "^(?:[.'])?",
|
1145
|
-
"beginCaptures": {
|
1146
|
-
"1": {
|
1147
|
-
"name": "punctuation.definition.escape.roff"
|
1148
|
-
}
|
1149
|
-
}
|
1145
|
+
"include": "#continuous-newline"
|
1150
1146
|
},
|
1151
1147
|
{
|
1152
1148
|
"include": "#register-expansion"
|
@@ -1578,6 +1574,121 @@
|
|
1578
1574
|
}
|
1579
1575
|
]
|
1580
1576
|
},
|
1577
|
+
"continuous-newline": {
|
1578
|
+
"begin": "(\\\\)?(\\\\)$\\n?",
|
1579
|
+
"end": "^(?:[.'])?",
|
1580
|
+
"beginCaptures": {
|
1581
|
+
"0": {
|
1582
|
+
"name": "constant.character.escape.newline.roff"
|
1583
|
+
},
|
1584
|
+
"1": {
|
1585
|
+
"name": "punctuation.definition.concealed.escape.backslash.roff"
|
1586
|
+
},
|
1587
|
+
"2": {
|
1588
|
+
"name": "punctuation.definition.escape.roff"
|
1589
|
+
}
|
1590
|
+
}
|
1591
|
+
},
|
1592
|
+
"escapes-clipped": {
|
1593
|
+
"patterns": [
|
1594
|
+
{
|
1595
|
+
"begin": "\\\\f(?:[I2]|\\(CI|\\[\\s*(?:[I2]|CI)\\s*\\])",
|
1596
|
+
"end": "$|(?=\\\\f[\\[A-Za-z0-9])",
|
1597
|
+
"beginCaptures": {
|
1598
|
+
"0": {
|
1599
|
+
"patterns": [
|
1600
|
+
{
|
1601
|
+
"include": "#escapes"
|
1602
|
+
}
|
1603
|
+
]
|
1604
|
+
}
|
1605
|
+
},
|
1606
|
+
"patterns": [
|
1607
|
+
{
|
1608
|
+
"include": "#escaped-newline"
|
1609
|
+
},
|
1610
|
+
{
|
1611
|
+
"include": "$self"
|
1612
|
+
},
|
1613
|
+
{
|
1614
|
+
"include": "#italic-word"
|
1615
|
+
}
|
1616
|
+
]
|
1617
|
+
},
|
1618
|
+
{
|
1619
|
+
"begin": "\\\\f(?:[B3]|\\(CB|\\[\\s*(?:[B3]|CB)\\s*\\])",
|
1620
|
+
"end": "$|(?=\\\\f[\\[A-Za-z0-9])",
|
1621
|
+
"beginCaptures": {
|
1622
|
+
"0": {
|
1623
|
+
"patterns": [
|
1624
|
+
{
|
1625
|
+
"include": "#escapes"
|
1626
|
+
}
|
1627
|
+
]
|
1628
|
+
}
|
1629
|
+
},
|
1630
|
+
"patterns": [
|
1631
|
+
{
|
1632
|
+
"include": "#escaped-newline"
|
1633
|
+
},
|
1634
|
+
{
|
1635
|
+
"include": "$self"
|
1636
|
+
},
|
1637
|
+
{
|
1638
|
+
"include": "#bold-word"
|
1639
|
+
}
|
1640
|
+
]
|
1641
|
+
},
|
1642
|
+
{
|
1643
|
+
"begin": "\\\\f(?:4|\\(BI|\\[\\s*BI\\s*\\])",
|
1644
|
+
"end": "$|(?=\\\\f[\\[A-Za-z0-9])",
|
1645
|
+
"beginCaptures": {
|
1646
|
+
"0": {
|
1647
|
+
"patterns": [
|
1648
|
+
{
|
1649
|
+
"include": "#escapes"
|
1650
|
+
}
|
1651
|
+
]
|
1652
|
+
}
|
1653
|
+
},
|
1654
|
+
"patterns": [
|
1655
|
+
{
|
1656
|
+
"include": "#escaped-newline"
|
1657
|
+
},
|
1658
|
+
{
|
1659
|
+
"include": "$self"
|
1660
|
+
},
|
1661
|
+
{
|
1662
|
+
"include": "#bold-italic-word"
|
1663
|
+
}
|
1664
|
+
]
|
1665
|
+
},
|
1666
|
+
{
|
1667
|
+
"begin": "\\\\f(?:\\(C[WR]|\\[\\s*C[WR]\\s*\\])",
|
1668
|
+
"end": "$|(?=\\\\f[\\[A-Za-z0-9])",
|
1669
|
+
"beginCaptures": {
|
1670
|
+
"0": {
|
1671
|
+
"patterns": [
|
1672
|
+
{
|
1673
|
+
"include": "#escapes"
|
1674
|
+
}
|
1675
|
+
]
|
1676
|
+
}
|
1677
|
+
},
|
1678
|
+
"patterns": [
|
1679
|
+
{
|
1680
|
+
"include": "#escaped-newline"
|
1681
|
+
},
|
1682
|
+
{
|
1683
|
+
"include": "$self"
|
1684
|
+
},
|
1685
|
+
{
|
1686
|
+
"include": "#monospace-word"
|
1687
|
+
}
|
1688
|
+
]
|
1689
|
+
}
|
1690
|
+
]
|
1691
|
+
},
|
1581
1692
|
"escapes-full": {
|
1582
1693
|
"patterns": [
|
1583
1694
|
{
|
@@ -2406,6 +2517,34 @@
|
|
2406
2517
|
}
|
2407
2518
|
]
|
2408
2519
|
},
|
2520
|
+
{
|
2521
|
+
"name": "meta.shell-snippet.mdoc.roff",
|
2522
|
+
"begin": "^(?:\\S*.*?\\s+)?(?i:Bash|(?:Bourne[\\s-]?)?Shell(?:[\\s-]?Script)?):\\s*$\\n?",
|
2523
|
+
"end": "^(?!\\t|\\s*$)",
|
2524
|
+
"beginCaptures": {
|
2525
|
+
"0": {
|
2526
|
+
"patterns": [
|
2527
|
+
{
|
2528
|
+
"include": "#main"
|
2529
|
+
}
|
2530
|
+
]
|
2531
|
+
}
|
2532
|
+
},
|
2533
|
+
"patterns": [
|
2534
|
+
{
|
2535
|
+
"match": ".+",
|
2536
|
+
"captures": {
|
2537
|
+
"0": {
|
2538
|
+
"patterns": [
|
2539
|
+
{
|
2540
|
+
"include": "source.shell"
|
2541
|
+
}
|
2542
|
+
]
|
2543
|
+
}
|
2544
|
+
}
|
2545
|
+
}
|
2546
|
+
]
|
2547
|
+
},
|
2409
2548
|
{
|
2410
2549
|
"include": "#main"
|
2411
2550
|
}
|
@@ -3349,7 +3488,7 @@
|
|
3349
3488
|
]
|
3350
3489
|
},
|
3351
3490
|
{
|
3352
|
-
"begin": "^([.'])[ \t]*(TP)(?=\\s|\\\\[\"#])(.*)?$\\n?",
|
3491
|
+
"begin": "^([.'])[ \t]*(TP|TQ)(?=\\s|\\\\[\"#])(.*)?$\\n?",
|
3353
3492
|
"end": "^(.*)(?<!\\\\)$",
|
3354
3493
|
"patterns": [
|
3355
3494
|
{
|
@@ -3384,42 +3523,17 @@
|
|
3384
3523
|
}
|
3385
3524
|
},
|
3386
3525
|
"endCaptures": {
|
3387
|
-
"1": {
|
3388
|
-
"patterns": [
|
3389
|
-
{
|
3390
|
-
"include": "$self"
|
3391
|
-
}
|
3392
|
-
]
|
3393
|
-
}
|
3394
|
-
}
|
3395
|
-
},
|
3396
|
-
{
|
3397
|
-
"name": "markup.list.unnumbered.man.macro.gnu.roff",
|
3398
|
-
"begin": "^([.'])[ \t]*(TQ)[ \t]*(\\\\[#\"].*)?$",
|
3399
|
-
"end": "^(?=[.'][ \t]*TP(?:\\s|\\\\[#\"]))",
|
3400
|
-
"beginCaptures": {
|
3401
3526
|
"0": {
|
3402
|
-
"name": "
|
3527
|
+
"name": "markup.heading.paragraph.roff"
|
3403
3528
|
},
|
3404
3529
|
"1": {
|
3405
|
-
"name": "punctuation.definition.macro.roff"
|
3406
|
-
},
|
3407
|
-
"2": {
|
3408
|
-
"name": "entity.function.name.gnu.roff"
|
3409
|
-
},
|
3410
|
-
"3": {
|
3411
3530
|
"patterns": [
|
3412
3531
|
{
|
3413
|
-
"include": "
|
3532
|
+
"include": "$self"
|
3414
3533
|
}
|
3415
3534
|
]
|
3416
3535
|
}
|
3417
|
-
}
|
3418
|
-
"patterns": [
|
3419
|
-
{
|
3420
|
-
"include": "$self"
|
3421
|
-
}
|
3422
|
-
]
|
3536
|
+
}
|
3423
3537
|
},
|
3424
3538
|
{
|
3425
3539
|
"name": "meta.deprecated.function.hanging-paragraph.man.macro.roff",
|
@@ -4827,8 +4941,8 @@
|
|
4827
4941
|
}
|
4828
4942
|
},
|
4829
4943
|
{
|
4830
|
-
"begin": "^([.'])[ \t]*(\\[)(
|
4831
|
-
"end": "^([.'])[ \t]*(\\])(
|
4944
|
+
"begin": "^([.'])[ \t]*(\\[)\\s*([-$'\\w.\\\\]*?)\\s*(\\\\[\"#].*)?$",
|
4945
|
+
"end": "^([.'])[ \t]*(\\])\\s*([-$'\\w.\\\\]*?)(?=\\s|$|\\\\\")",
|
4832
4946
|
"contentName": "meta.citation.roff",
|
4833
4947
|
"patterns": [
|
4834
4948
|
{
|
@@ -6,7 +6,7 @@
|
|
6
6
|
],
|
7
7
|
"patterns": [
|
8
8
|
{
|
9
|
-
"match": "(?x) ^ (?: ( (?:\\(\\S+\\))? (?: sh\\S*? | \\w+\\S+[@:]\\S+(?:\\s+\\S+)? | \\[\\S+[@:][^\\n]+\\].+ ) ) \\s* )? ( [>$#%] ) \\s+ (.*) $",
|
9
|
+
"match": "(?x) ^ (?: ( (?:\\(\\S+\\))? (?: sh\\S*? | \\w+\\S+[@:]\\S+(?:\\s+\\S+)? | \\[\\S+[@:][^\\n]+\\].+ ) ) \\s* )? ( [>$#%] | \\p{Greek} ) \\s+ (.*) $",
|
10
10
|
"captures": {
|
11
11
|
"1": {
|
12
12
|
"name": "entity.other.prompt-prefix.shell-session"
|
data/lib/linguist/blob_helper.rb
CHANGED
@@ -275,10 +275,8 @@ module Linguist
|
|
275
275
|
# also--importantly--without having to duplicate many (potentially
|
276
276
|
# large) strings.
|
277
277
|
begin
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
data.split(Regexp.union(encoded_newlines), -1)
|
278
|
+
|
279
|
+
data.split(encoded_newlines_re, -1)
|
282
280
|
rescue Encoding::ConverterNotFoundError
|
283
281
|
# The data is not splittable in the detected encoding. Assume it's
|
284
282
|
# one big line.
|
@@ -289,6 +287,51 @@ module Linguist
|
|
289
287
|
end
|
290
288
|
end
|
291
289
|
|
290
|
+
def encoded_newlines_re
|
291
|
+
@encoded_newlines_re ||= Regexp.union(["\r\n", "\r", "\n"].
|
292
|
+
map { |nl| nl.encode(ruby_encoding, "ASCII-8BIT").force_encoding(data.encoding) })
|
293
|
+
|
294
|
+
end
|
295
|
+
|
296
|
+
def first_lines(n)
|
297
|
+
return lines[0...n] if defined? @lines
|
298
|
+
return [] unless viewable? && data
|
299
|
+
|
300
|
+
i, c = 0, 0
|
301
|
+
while c < n && j = data.index(encoded_newlines_re, i)
|
302
|
+
i = j + $&.length
|
303
|
+
c += 1
|
304
|
+
end
|
305
|
+
data[0...i].split(encoded_newlines_re, -1)
|
306
|
+
end
|
307
|
+
|
308
|
+
def last_lines(n)
|
309
|
+
if defined? @lines
|
310
|
+
if n >= @lines.length
|
311
|
+
@lines
|
312
|
+
else
|
313
|
+
lines[-n..-1]
|
314
|
+
end
|
315
|
+
end
|
316
|
+
return [] unless viewable? && data
|
317
|
+
|
318
|
+
no_eol = true
|
319
|
+
i, c = data.length, 0
|
320
|
+
k = i
|
321
|
+
while c < n && j = data.rindex(encoded_newlines_re, i - 1)
|
322
|
+
if c == 0 && j + $&.length == i
|
323
|
+
no_eol = false
|
324
|
+
n += 1
|
325
|
+
end
|
326
|
+
i = j
|
327
|
+
k = j + $&.length
|
328
|
+
c += 1
|
329
|
+
end
|
330
|
+
r = data[k..-1].split(encoded_newlines_re, -1)
|
331
|
+
r.pop if !no_eol
|
332
|
+
r
|
333
|
+
end
|
334
|
+
|
292
335
|
# Public: Get number of lines of code
|
293
336
|
#
|
294
337
|
# Requires Blob#data
|
data/lib/linguist/classifier.rb
CHANGED
@@ -3,6 +3,8 @@ require 'linguist/tokenizer'
|
|
3
3
|
module Linguist
|
4
4
|
# Language bayesian classifier.
|
5
5
|
class Classifier
|
6
|
+
CLASSIFIER_CONSIDER_BYTES = 50 * 1024
|
7
|
+
|
6
8
|
# Public: Use the classifier to detect language of the blob.
|
7
9
|
#
|
8
10
|
# blob - An object that quacks like a blob.
|
@@ -17,7 +19,7 @@ module Linguist
|
|
17
19
|
# Returns an Array of Language objects, most probable first.
|
18
20
|
def self.call(blob, possible_languages)
|
19
21
|
language_names = possible_languages.map(&:name)
|
20
|
-
classify(Samples.cache, blob.data, language_names).map do |name, _|
|
22
|
+
classify(Samples.cache, blob.data[0...CLASSIFIER_CONSIDER_BYTES], language_names).map do |name, _|
|
21
23
|
Language[name] # Return the actual Language objects
|
22
24
|
end
|
23
25
|
end
|