github-linguist 7.30.0 → 8.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/grammars/inline.edgeql.json +1 -1
  3. data/grammars/inline.peggy.json +1 -0
  4. data/grammars/markdown.move.codeblock.json +1 -1
  5. data/grammars/mdx.move.codeblock.json +1 -0
  6. data/grammars/source.abl.json +1 -1
  7. data/grammars/source.bicep.json +1 -1
  8. data/grammars/source.cairo.json +1 -1
  9. data/grammars/source.cairo0.json +1 -0
  10. data/grammars/source.cl.json +1 -1
  11. data/grammars/source.clar.json +1 -1
  12. data/grammars/source.clarion.json +1 -1
  13. data/grammars/source.cmd.json +1 -1
  14. data/grammars/source.commonlisp.json +1 -1
  15. data/grammars/source.cs.json +1 -1
  16. data/grammars/source.curlrc.json +1 -1
  17. data/grammars/source.curry.json +1 -1
  18. data/grammars/source.cylc.json +1 -0
  19. data/grammars/source.dart.json +1 -1
  20. data/grammars/source.dds.dspf.json +1 -1
  21. data/grammars/source.dds.icff.json +1 -1
  22. data/grammars/source.dds.lf.json +1 -1
  23. data/grammars/source.dds.pf.json +1 -1
  24. data/grammars/source.dds.prtf.json +1 -1
  25. data/grammars/source.dune.json +1 -0
  26. data/grammars/source.elvish.json +1 -1
  27. data/grammars/source.fsharp.json +1 -1
  28. data/grammars/source.gdscript.json +1 -1
  29. data/grammars/source.gitconfig.json +1 -1
  30. data/grammars/source.gleam.json +1 -1
  31. data/grammars/source.hgignore.json +1 -1
  32. data/grammars/source.hosts.json +1 -1
  33. data/grammars/source.iCalendar.json +1 -0
  34. data/grammars/source.ice.json +1 -1
  35. data/grammars/source.julia.json +1 -1
  36. data/grammars/source.just.json +1 -1
  37. data/grammars/source.lcb.json +1 -0
  38. data/grammars/source.lilypond.json +1 -1
  39. data/grammars/source.livecodescript.json +1 -0
  40. data/grammars/source.luau.json +1 -1
  41. data/grammars/source.matlab.json +1 -1
  42. data/grammars/source.mcfunction.json +1 -1
  43. data/grammars/source.mdx.json +1 -1
  44. data/grammars/source.mo.json +1 -1
  45. data/grammars/source.move.json +1 -1
  46. data/grammars/source.nanorc.json +1 -1
  47. data/grammars/source.nr.json +1 -0
  48. data/grammars/source.p4.json +1 -1
  49. data/grammars/source.peggy.json +1 -0
  50. data/grammars/source.polar.json +1 -1
  51. data/grammars/source.powerbuilder.json +1 -0
  52. data/grammars/source.qsharp.json +1 -1
  53. data/grammars/source.rpgle.json +1 -1
  54. data/grammars/source.rust.json +1 -1
  55. data/grammars/source.sentinel.json +1 -1
  56. data/grammars/source.sourcepawn.json +1 -1
  57. data/grammars/source.stan.json +1 -1
  58. data/grammars/source.swift.json +1 -1
  59. data/grammars/source.sy.json +1 -1
  60. data/grammars/source.vba.json +1 -1
  61. data/grammars/source.vcard.json +1 -0
  62. data/grammars/source.wdl.json +1 -1
  63. data/grammars/text.adblock.json +1 -1
  64. data/grammars/text.html.jte.json +1 -0
  65. data/grammars/text.html.statamic.json +1 -1
  66. data/grammars/text.md.json +1 -1
  67. data/grammars/text.mdx.astro.codeblock.json +1 -0
  68. data/grammars/version +1 -1
  69. data/lib/linguist/VERSION +1 -1
  70. data/lib/linguist/classifier.rb +315 -106
  71. data/lib/linguist/generated.rb +17 -4
  72. data/lib/linguist/generic.yml +1 -0
  73. data/lib/linguist/heuristics.rb +6 -6
  74. data/lib/linguist/heuristics.yml +54 -4
  75. data/lib/linguist/languages.json +1 -1
  76. data/lib/linguist/languages.yml +123 -7
  77. data/lib/linguist/samples.json +1 -1
  78. data/lib/linguist/samples.rb +9 -1
  79. data/lib/linguist/sha256.rb +1 -1
  80. metadata +17 -5
  81. data/grammars/markdown.mcfunction.codeblock.json +0 -1
  82. data/grammars/mdx.LANGUAGE.codeblock.json +0 -1
@@ -25,7 +25,14 @@ module Linguist
25
25
  # Hash of serialized samples object, uncached
26
26
  def self.load_samples
27
27
  serializer = defined?(Yajl) ? Yajl : JSON
28
- serializer.load(File.read(PATH, encoding: 'utf-8'))
28
+ data = serializer.load(File.read(PATH, encoding: 'utf-8'))
29
+ # JSON serialization does not allow integer keys, we fix them here
30
+ for lang in data['centroids'].keys
31
+ fixed = data['centroids'][lang].to_a.map { |k,v| [k.to_i, v] }
32
+ data['centroids'][lang] = Hash[fixed]
33
+ end
34
+
35
+ data
29
36
  end
30
37
 
31
38
  # Public: Iterate over each sample.
@@ -106,6 +113,7 @@ module Linguist
106
113
  Classifier.train!(db, language_name, data)
107
114
  end
108
115
 
116
+ Classifier.finalize_train! db
109
117
  db['sha256'] = Linguist::SHA256.hexdigest(db)
110
118
 
111
119
  db
@@ -13,7 +13,7 @@ module Linguist
13
13
  digest = Digest::SHA256.new
14
14
 
15
15
  case obj
16
- when String, Symbol, Integer
16
+ when String, Symbol, Integer, Float
17
17
  digest.update "#{obj.class}"
18
18
  digest.update "#{obj}"
19
19
  when TrueClass, FalseClass, NilClass
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: github-linguist
3
3
  version: !ruby/object:Gem::Version
4
- version: 7.30.0
4
+ version: 8.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - GitHub
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-06-07 00:00:00.000000000 Z
11
+ date: 2024-09-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cgi
@@ -248,6 +248,7 @@ files:
248
248
  - grammars/inline.graphql.res.json
249
249
  - grammars/inline.graphql.scala.json
250
250
  - grammars/inline.hbs.json
251
+ - grammars/inline.peggy.json
251
252
  - grammars/inline.prisma.json
252
253
  - grammars/inline.template.json
253
254
  - grammars/liquid.injection.json
@@ -264,7 +265,6 @@ files:
264
265
  - grammars/markdown.hxml.codeblock.json
265
266
  - grammars/markdown.lean.codeblock.json
266
267
  - grammars/markdown.lean4.codeblock.json
267
- - grammars/markdown.mcfunction.codeblock.json
268
268
  - grammars/markdown.move.codeblock.json
269
269
  - grammars/markdown.plantuml.codeblock.json
270
270
  - grammars/markdown.prisma.codeblock.json
@@ -272,7 +272,7 @@ files:
272
272
  - grammars/markdown.rescript.codeblock.json
273
273
  - grammars/markdown.talon.codeblock.json
274
274
  - grammars/markdown.textproto.codeblock.json
275
- - grammars/mdx.LANGUAGE.codeblock.json
275
+ - grammars/mdx.move.codeblock.json
276
276
  - grammars/objdump.x86asm.json
277
277
  - grammars/source.2da.json
278
278
  - grammars/source.4dm.json
@@ -348,6 +348,7 @@ files:
348
348
  - grammars/source.cache.cmake.json
349
349
  - grammars/source.cadence.json
350
350
  - grammars/source.cairo.json
351
+ - grammars/source.cairo0.json
351
352
  - grammars/source.camlp4.ocaml.json
352
353
  - grammars/source.capnp.json
353
354
  - grammars/source.cds.json
@@ -398,6 +399,7 @@ files:
398
399
  - grammars/source.curlrc.json
399
400
  - grammars/source.curry.json
400
401
  - grammars/source.cwl.json
402
+ - grammars/source.cylc.json
401
403
  - grammars/source.cypher.json
402
404
  - grammars/source.cython.json
403
405
  - grammars/source.d.json
@@ -428,6 +430,7 @@ files:
428
430
  - grammars/source.dosbox-conf.json
429
431
  - grammars/source.dot.json
430
432
  - grammars/source.dotenv.json
433
+ - grammars/source.dune.json
431
434
  - grammars/source.dylan.json
432
435
  - grammars/source.earthfile.json
433
436
  - grammars/source.ebnf.json
@@ -529,6 +532,7 @@ files:
529
532
  - grammars/source.hx.type.json
530
533
  - grammars/source.hxml.json
531
534
  - grammars/source.hy.json
535
+ - grammars/source.iCalendar.json
532
536
  - grammars/source.ice.json
533
537
  - grammars/source.icurry.json
534
538
  - grammars/source.ideal.json
@@ -580,6 +584,7 @@ files:
580
584
  - grammars/source.kusto.json
581
585
  - grammars/source.lark.json
582
586
  - grammars/source.lbnf.json
587
+ - grammars/source.lcb.json
583
588
  - grammars/source.lcov.json
584
589
  - grammars/source.lean.json
585
590
  - grammars/source.lean.markdown.json
@@ -592,6 +597,7 @@ files:
592
597
  - grammars/source.lilypond.json
593
598
  - grammars/source.lisp.json
594
599
  - grammars/source.litcoffee.json
600
+ - grammars/source.livecodescript.json
595
601
  - grammars/source.livescript.json
596
602
  - grammars/source.llvm.json
597
603
  - grammars/source.logos.json
@@ -670,6 +676,7 @@ files:
670
676
  - grammars/source.ninja.json
671
677
  - grammars/source.nit.json
672
678
  - grammars/source.nix.json
679
+ - grammars/source.nr.json
673
680
  - grammars/source.nsis.json
674
681
  - grammars/source.nu.json
675
682
  - grammars/source.nunjucks.json
@@ -711,6 +718,7 @@ files:
711
718
  - grammars/source.pddl.happenings.json
712
719
  - grammars/source.pddl.json
713
720
  - grammars/source.pddl.plan.json
721
+ - grammars/source.peggy.json
714
722
  - grammars/source.pegjs.json
715
723
  - grammars/source.pep8.json
716
724
  - grammars/source.perl.6.json
@@ -732,6 +740,7 @@ files:
732
740
  - grammars/source.postcss.json
733
741
  - grammars/source.postscript.json
734
742
  - grammars/source.pov-ray sdl.json
743
+ - grammars/source.powerbuilder.json
735
744
  - grammars/source.powershell.json
736
745
  - grammars/source.praat.json
737
746
  - grammars/source.prisma.json
@@ -880,6 +889,7 @@ files:
880
889
  - grammars/source.varnish.vcl.json
881
890
  - grammars/source.vba.json
882
891
  - grammars/source.vbnet.json
892
+ - grammars/source.vcard.json
883
893
  - grammars/source.velocity.html.json
884
894
  - grammars/source.velocity.json
885
895
  - grammars/source.verilog.json
@@ -963,6 +973,7 @@ files:
963
973
  - grammars/text.html.javadoc.json
964
974
  - grammars/text.html.js.json
965
975
  - grammars/text.html.jsp.json
976
+ - grammars/text.html.jte.json
966
977
  - grammars/text.html.liquid.json
967
978
  - grammars/text.html.mako.json
968
979
  - grammars/text.html.markdown.astro.json
@@ -992,6 +1003,7 @@ files:
992
1003
  - grammars/text.log.latex.json
993
1004
  - grammars/text.marko.json
994
1005
  - grammars/text.md.json
1006
+ - grammars/text.mdx.astro.codeblock.json
995
1007
  - grammars/text.muse.json
996
1008
  - grammars/text.openbsd-pkg.desc.json
997
1009
  - grammars/text.plain.json
@@ -1086,7 +1098,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
1086
1098
  - !ruby/object:Gem::Version
1087
1099
  version: '0'
1088
1100
  requirements: []
1089
- rubygems_version: 3.5.9
1101
+ rubygems_version: 3.5.11
1090
1102
  signing_key:
1091
1103
  specification_version: 4
1092
1104
  summary: GitHub Language detection
@@ -1 +0,0 @@
1
- {"scopeName":"markdown.mcfunction.codeblock","patterns":[{"include":"#codeblock.outer"}],"repository":{"codeblock.inner":{"contentName":"meta.embedded.block.mcfunction","begin":"(^|\\G)","while":"(^|\\G)(?!\\s*([`~]{3,})\\s*$)","patterns":[{"include":"source.mcfunction"}]},"codeblock.outer":{"name":"markup.fenced_code.block.markdown","begin":"(^|\\G)(\\s*)(`{3,}|~{3,})\\s*(?i:(mcfunction)((\\s+|:|\\{|\\?)[^`~]*)?$)","end":"(^|\\G)(\\2|\\s{0,3})(\\3)\\s*$","patterns":[{"include":"#codeblock.inner"}],"beginCaptures":{"3":{"name":"punctuation.definition.markdown"},"4":{"name":"fenced_code.block.language.markdown"},"5":{"name":"fenced_code.block.language.attributes.markdown"}},"endCaptures":{"3":{"name":"punctuation.definition.markdown"}}}}}
@@ -1 +0,0 @@
1
- {"scopeName":"mdx.LANGUAGE.codeblock","patterns":[{"include":"#LANGUAGE-code-block"}],"repository":{"LANGUAGE-code-block":{"name":"markup.code.other.mdx","end":"(^|\\G)(\\2|\\s{0,3})(\\3)\\s*$","patterns":[{"contentName":"meta.embedded.block.LANGUAGE","begin":"(^|\\G)(\\s*)(.*)","while":"(^|\\G)(?!\\s*([`~]{3,})\\s*$)","patterns":[{}]}],"beginCaptures":{"3":{"name":"string.other.begin.code.fenced.mdx"},"4":{"name":"entity.name.function.mdx"}},"endCaptures":{"3":{"name":"string.other.end.code.fenced.mdx"}}}}}