RubyGems - github-linguist - Versions diffs - 5.3.1 → 5.3.2 - Mend

github-linguist 5.3.1 → 5.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

checksums.yaml +4 -4
data/ext/linguist/extconf.rb +3 -0
data/ext/linguist/lex.linguist_yy.c +8269 -0
data/ext/linguist/lex.linguist_yy.h +353 -0
data/ext/linguist/linguist.c +64 -0
data/ext/linguist/linguist.h +11 -0
data/ext/linguist/tokenizer.l +119 -0
data/grammars/source.coffee.json +123 -41
data/grammars/source.crystal.json +2 -2
data/grammars/source.css.less.json +319 -27
data/grammars/source.glsl.json +1 -1
data/grammars/source.js.json +6 -2
data/grammars/source.meson.json +1 -1
data/grammars/source.tsx.json +4 -14
data/grammars/source.wdl.json +2 -2
data/grammars/text.roff.json +155 -41
data/grammars/text.shell-session.json +1 -1
data/lib/linguist/blob_helper.rb +47 -4
data/lib/linguist/classifier.rb +3 -1
data/lib/linguist/file_blob.rb +3 -3
data/lib/linguist/heuristics.rb +15 -6
data/lib/linguist/linguist.bundle +0 -0
data/lib/linguist/samples.json +49989 -44225
data/lib/linguist/strategy/modeline.rb +2 -2
data/lib/linguist/tokenizer.rb +1 -186
data/lib/linguist/version.rb +1 -1
metadata +25 -3

data/ext/linguist/lex.linguist_yy.h ADDED

@@ -0,0 +1,353 @@
+#ifndef linguist_yyHEADER_H
+#define linguist_yyHEADER_H 1
+#define linguist_yyIN_HEADER 1
+#line 6 "lex.linguist_yy.h"
+#define  YY_INT_ALIGNED short int
+/* A lexical scanner generated by flex */
+#define FLEX_SCANNER
+#define YY_FLEX_MAJOR_VERSION 2
+#define YY_FLEX_MINOR_VERSION 5
+#define YY_FLEX_SUBMINOR_VERSION 39
+#if YY_FLEX_SUBMINOR_VERSION > 0
+#define FLEX_BETA
+#endif
+/* First, we deal with  platform-specific or compiler-specific issues. */
+/* begin standard C headers. */
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+/* end standard C headers. */
+/* flex integer type definitions */
+#ifndef FLEXINT_H
+#define FLEXINT_H
+/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
+ * if you want the limit (max/min) macros for int types.
+ */
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS 1
+#endif
+#include <inttypes.h>
+typedef int8_t flex_int8_t;
+typedef uint8_t flex_uint8_t;
+typedef int16_t flex_int16_t;
+typedef uint16_t flex_uint16_t;
+typedef int32_t flex_int32_t;
+typedef uint32_t flex_uint32_t;
+#else
+typedef signed char flex_int8_t;
+typedef short int flex_int16_t;
+typedef int flex_int32_t;
+typedef unsigned char flex_uint8_t;
+typedef unsigned short int flex_uint16_t;
+typedef unsigned int flex_uint32_t;
+/* Limits of integral types. */
+#ifndef INT8_MIN
+#define INT8_MIN               (-128)
+#endif
+#ifndef INT16_MIN
+#define INT16_MIN              (-32767-1)
+#endif
+#ifndef INT32_MIN
+#define INT32_MIN              (-2147483647-1)
+#endif
+#ifndef INT8_MAX
+#define INT8_MAX               (127)
+#endif
+#ifndef INT16_MAX
+#define INT16_MAX              (32767)
+#endif
+#ifndef INT32_MAX
+#define INT32_MAX              (2147483647)
+#endif
+#ifndef UINT8_MAX
+#define UINT8_MAX              (255U)
+#endif
+#ifndef UINT16_MAX
+#define UINT16_MAX             (65535U)
+#endif
+#ifndef UINT32_MAX
+#define UINT32_MAX             (4294967295U)
+#endif
+#endif /* ! C99 */
+#endif /* ! FLEXINT_H */
+#ifdef __cplusplus
+/* The "const" storage-class-modifier is valid. */
+#define YY_USE_CONST
+#else	/* ! __cplusplus */
+/* C99 requires __STDC__ to be defined as 1. */
+#if defined (__STDC__)
+#define YY_USE_CONST
+#endif	/* defined (__STDC__) */
+#endif	/* ! __cplusplus */
+#ifdef YY_USE_CONST
+#define yyconst const
+#else
+#define yyconst
+#endif
+/* An opaque pointer. */
+#ifndef YY_TYPEDEF_YY_SCANNER_T
+#define YY_TYPEDEF_YY_SCANNER_T
+typedef void* yyscan_t;
+#endif
+/* For convenience, these vars (plus the bison vars far below)
+   are macros in the reentrant scanner. */
+#define yyin yyg->yyin_r
+#define yyout yyg->yyout_r
+#define yyextra yyg->yyextra_r
+#define yyleng yyg->yyleng_r
+#define yytext yyg->yytext_r
+#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno)
+#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column)
+#define yy_flex_debug yyg->yy_flex_debug_r
+/* Size of default input buffer. */
+#ifndef YY_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k.
+ * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case.
+ * Ditto for the __ia64__ case accordingly.
+ */
+#define YY_BUF_SIZE 32768
+#else
+#define YY_BUF_SIZE 16384
+#endif /* __ia64__ */
+#endif
+#ifndef YY_TYPEDEF_YY_BUFFER_STATE
+#define YY_TYPEDEF_YY_BUFFER_STATE
+typedef struct yy_buffer_state *YY_BUFFER_STATE;
+#endif
+#ifndef YY_TYPEDEF_YY_SIZE_T
+#define YY_TYPEDEF_YY_SIZE_T
+typedef size_t yy_size_t;
+#endif
+#ifndef YY_STRUCT_YY_BUFFER_STATE
+#define YY_STRUCT_YY_BUFFER_STATE
+struct yy_buffer_state
+	{
+	FILE *yy_input_file;
+	char *yy_ch_buf;		/* input buffer */
+	char *yy_buf_pos;		/* current position in input buffer */
+	/* Size of input buffer in bytes, not including room for EOB
+	 * characters.
+	 */
+	yy_size_t yy_buf_size;
+	/* Number of characters read into yy_ch_buf, not including EOB
+	 * characters.
+	 */
+	yy_size_t yy_n_chars;
+	/* Whether we "own" the buffer - i.e., we know we created it,
+	 * and can realloc() it to grow it, and should free() it to
+	 * delete it.
+	 */
+	int yy_is_our_buffer;
+	/* Whether this is an "interactive" input source; if so, and
+	 * if we're using stdio for input, then we want to use getc()
+	 * instead of fread(), to make sure we stop fetching input after
+	 * each newline.
+	 */
+	int yy_is_interactive;
+	/* Whether we're considered to be at the beginning of a line.
+	 * If so, '^' rules will be active on the next match, otherwise
+	 * not.
+	 */
+	int yy_at_bol;
+    int yy_bs_lineno; /**< The line count. */
+    int yy_bs_column; /**< The column count. */
+	/* Whether to try to fill the input buffer when we reach the
+	 * end of it.
+	 */
+	int yy_fill_buffer;
+	int yy_buffer_status;
+	};
+#endif /* !YY_STRUCT_YY_BUFFER_STATE */
+void linguist_yyrestart (FILE *input_file ,yyscan_t yyscanner );
+void linguist_yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
+YY_BUFFER_STATE linguist_yy_create_buffer (FILE *file,int size ,yyscan_t yyscanner );
+void linguist_yy_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
+void linguist_yy_flush_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
+void linguist_yypush_buffer_state (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
+void linguist_yypop_buffer_state (yyscan_t yyscanner );
+YY_BUFFER_STATE linguist_yy_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner );
+YY_BUFFER_STATE linguist_yy_scan_string (yyconst char *yy_str ,yyscan_t yyscanner );
+YY_BUFFER_STATE linguist_yy_scan_bytes (yyconst char *bytes,yy_size_t len ,yyscan_t yyscanner );
+void *linguist_yyalloc (yy_size_t ,yyscan_t yyscanner );
+void *linguist_yyrealloc (void *,yy_size_t ,yyscan_t yyscanner );
+void linguist_yyfree (void * ,yyscan_t yyscanner );
+/* Begin user sect3 */
+#define yytext_ptr yytext_r
+#ifdef YY_HEADER_EXPORT_START_CONDITIONS
+#define INITIAL 0
+#define sgml 1
+#define c_comment 2
+#define xml_comment 3
+#define haskell_comment 4
+#define ocaml_comment 5
+#define python_dcomment 6
+#define python_scomment 7
+#endif
+#ifndef YY_NO_UNISTD_H
+/* Special case for "unistd.h", since it is non-ANSI. We include it way
+ * down here because we want the user's section 1 to have been scanned first.
+ * The user has a chance to override it with an option.
+ */
+#include <unistd.h>
+#endif
+#define YY_EXTRA_TYPE struct tokenizer_extra *
+int linguist_yylex_init (yyscan_t* scanner);
+int linguist_yylex_init_extra (YY_EXTRA_TYPE user_defined,yyscan_t* scanner);
+/* Accessor methods to globals.
+   These are made visible to non-reentrant scanners for convenience. */
+int linguist_yylex_destroy (yyscan_t yyscanner );
+int linguist_yyget_debug (yyscan_t yyscanner );
+void linguist_yyset_debug (int debug_flag ,yyscan_t yyscanner );
+YY_EXTRA_TYPE linguist_yyget_extra (yyscan_t yyscanner );
+void linguist_yyset_extra (YY_EXTRA_TYPE user_defined ,yyscan_t yyscanner );
+FILE *linguist_yyget_in (yyscan_t yyscanner );
+void linguist_yyset_in  (FILE * in_str ,yyscan_t yyscanner );
+FILE *linguist_yyget_out (yyscan_t yyscanner );
+void linguist_yyset_out  (FILE * out_str ,yyscan_t yyscanner );
+yy_size_t linguist_yyget_leng (yyscan_t yyscanner );
+char *linguist_yyget_text (yyscan_t yyscanner );
+int linguist_yyget_lineno (yyscan_t yyscanner );
+void linguist_yyset_lineno (int line_number ,yyscan_t yyscanner );
+int linguist_yyget_column  (yyscan_t yyscanner );
+void linguist_yyset_column (int column_no ,yyscan_t yyscanner );
+/* Macros after this point can all be overridden by user definitions in
+ * section 1.
+ */
+#ifndef YY_SKIP_YYWRAP
+#ifdef __cplusplus
+extern "C" int linguist_yywrap (yyscan_t yyscanner );
+#else
+extern int linguist_yywrap (yyscan_t yyscanner );
+#endif
+#endif
+#ifndef yytext_ptr
+static void yy_flex_strncpy (char *,yyconst char *,int ,yyscan_t yyscanner);
+#endif
+#ifdef YY_NEED_STRLEN
+static int yy_flex_strlen (yyconst char * ,yyscan_t yyscanner);
+#endif
+#ifndef YY_NO_INPUT
+#endif
+/* Amount of stuff to slurp up with each read. */
+#ifndef YY_READ_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k */
+#define YY_READ_BUF_SIZE 16384
+#else
+#define YY_READ_BUF_SIZE 8192
+#endif /* __ia64__ */
+#endif
+/* Number of entries by which start-condition stack grows. */
+#ifndef YY_START_STACK_INCR
+#define YY_START_STACK_INCR 25
+#endif
+/* Default declaration of generated scanner - a define so the user can
+ * easily add parameters.
+ */
+#ifndef YY_DECL
+#define YY_DECL_IS_OURS 1
+extern int linguist_yylex (yyscan_t yyscanner);
+#define YY_DECL int linguist_yylex (yyscan_t yyscanner)
+#endif /* !YY_DECL */
+/* yy_get_previous_state - get the state just before the EOB char was reached */
+#undef YY_NEW_FILE
+#undef YY_FLUSH_BUFFER
+#undef yy_set_bol
+#undef yy_new_buffer
+#undef yy_set_interactive
+#undef YY_DO_BEFORE_ACTION
+#ifdef YY_DECL_IS_OURS
+#undef YY_DECL_IS_OURS
+#undef YY_DECL
+#endif
+#line 117 "tokenizer.l"
+#line 352 "lex.linguist_yy.h"
+#undef linguist_yyIN_HEADER
+#endif /* linguist_yyHEADER_H */

data/ext/linguist/linguist.c ADDED

@@ -0,0 +1,64 @@
+#include "ruby.h"
+#include "linguist.h"
+#include "lex.linguist_yy.h"
+int linguist_yywrap(yyscan_t yyscanner) {
+	return 1;
+}
+static VALUE rb_tokenizer_extract_tokens(VALUE self, VALUE rb_data) {
+	YY_BUFFER_STATE buf;
+	yyscan_t scanner;
+	struct tokenizer_extra extra;
+	VALUE ary, s;
+	long len;
+	int r;
+	Check_Type(rb_data, T_STRING);
+	len = RSTRING_LEN(rb_data);
+	if (len > 100000)
+		len = 100000;
+	linguist_yylex_init_extra(&extra, &scanner);
+	buf = linguist_yy_scan_bytes(RSTRING_PTR(rb_data), (int) len, scanner);
+	ary = rb_ary_new();
+	do {
+		extra.type = NO_ACTION;
+		extra.token = NULL;
+		r = linguist_yylex(scanner);
+		switch (extra.type) {
+		case NO_ACTION:
+			break;
+		case REGULAR_TOKEN:
+			rb_ary_push(ary, rb_str_new2(extra.token));
+			free(extra.token);
+			break;
+		case SHEBANG_TOKEN:
+			s = rb_str_new2("SHEBANG#!");
+			rb_str_cat2(s, extra.token);
+			rb_ary_push(ary, s);
+			free(extra.token);
+			break;
+		case SGML_TOKEN:
+			s = rb_str_new2(extra.token);
+			rb_str_cat2(s, ">");
+			rb_ary_push(ary, s);
+			free(extra.token);
+			break;
+		}
+	} while (r);
+	linguist_yy_delete_buffer(buf, scanner);
+	linguist_yylex_destroy(scanner);
+	return ary;
+}
+__attribute__((visibility("default"))) void Init_linguist() {
+	VALUE rb_mLinguist = rb_define_module("Linguist");
+	VALUE rb_cTokenizer = rb_define_class_under(rb_mLinguist, "Tokenizer", rb_cObject);
+	rb_define_method(rb_cTokenizer, "extract_tokens", rb_tokenizer_extract_tokens, 1);
+}

data/ext/linguist/linguist.h ADDED

@@ -0,0 +1,11 @@
+enum tokenizer_type {
+  NO_ACTION,
+  REGULAR_TOKEN,
+  SHEBANG_TOKEN,
+  SGML_TOKEN,
+};
+struct tokenizer_extra {
+  char *token;
+  enum tokenizer_type type;
+};

data/ext/linguist/tokenizer.l ADDED

@@ -0,0 +1,119 @@
+%{
+#include "linguist.h"
+#define feed_token(tok, typ) do { \
+    yyextra->token = (tok); \
+    yyextra->type = (typ); \
+  } while (0)
+#define eat_until_eol() do { \
+    int c; \
+    while ((c = input(yyscanner)) != '\n' && c != EOF); \
+    if (c == EOF) \
+      yyterminate(); \
+  } while (0)
+#define eat_until_unescaped(q) do { \
+    int c; \
+    while ((c = input(yyscanner)) != EOF) { \
+      if (c == '\n') \
+        break; \
+      if (c == '\\') { \
+        c = input(yyscanner); \
+        if (c == EOF) \
+          yyterminate(); \
+      } else if (c == q) \
+        break; \
+    } \
+    if (c == EOF) \
+      yyterminate(); \
+  } while (0)
+%}
+%option never-interactive yywrap reentrant nounput warn nodefault header-file="lex.linguist_yy.h" extra-type="struct tokenizer_extra *" prefix="linguist_yy"
+%x sgml c_comment xml_comment haskell_comment ocaml_comment python_dcomment python_scomment
+%%
+^#![ \t]*([[:alnum:]_\/]*\/)?env([ \t]+([^ \t=]*=[^ \t]*))*[ \t]+[[:alpha:]_]+ {
+    const char *off = strrchr(yytext, ' ');
+    if (!off)
+      off = yytext;
+    else
+      ++off;
+    feed_token(strdup(off), SHEBANG_TOKEN);
+    eat_until_eol();
+    return 1;
+  }
+^#![ \t]*[[:alpha:]_\/]+ {
+    const char *off = strrchr(yytext, '/');
+    if (!off)
+      off = yytext;
+    else
+      ++off;
+    if (strcmp(off, "env") == 0) {
+      eat_until_eol();
+    } else {
+      feed_token(strdup(off), SHEBANG_TOKEN);
+      eat_until_eol();
+      return 1;
+    }
+  }
+^[ \t]*(\/\/|--|\#|%|\")" ".*   { /* nothing */ }
+"/*"                              { BEGIN(c_comment); }
+  /* See below for xml_comment start. */
+"{-"                              { BEGIN(haskell_comment); }
+"(*"                              { BEGIN(ocaml_comment); }
+"\"\"\""                          { BEGIN(python_dcomment); }
+"'''"                             { BEGIN(python_scomment); }
+<c_comment,xml_comment,haskell_comment,ocaml_comment,python_dcomment,python_scomment>.|\n { /* nothing */ }
+<c_comment>"*/"                   { BEGIN(INITIAL); }
+<xml_comment>"-->"                { BEGIN(INITIAL); }
+<haskell_comment>"-}"             { BEGIN(INITIAL); }
+<ocaml_comment>"*)"               { BEGIN(INITIAL); }
+<python_dcomment>"\"\"\""         { BEGIN(INITIAL); }
+<python_scomment>"'''"            { BEGIN(INITIAL); }
+\"\"|''                           { /* nothing */ }
+\"                                { eat_until_unescaped('"'); }
+'                                 { eat_until_unescaped('\''); }
+(0x[0-9a-fA-F]([0-9a-fA-F]|\.)*|[0-9]([0-9]|\.)*)([uU][lL]{0,2}|([eE][-+][0-9]*)?[fFlL]*) { /* nothing */ }
+\<[^ \t\n\r<>]+/>|" "[^<>\n]{0,2048}>               {
+    if (strcmp(yytext, "<!--") == 0) {
+     BEGIN(xml_comment);
+    } else {
+      feed_token(strdup(yytext), SGML_TOKEN);
+      BEGIN(sgml);
+      return 1;
+    }
+  }
+<sgml>[[:alnum:]_]+=/\"           { feed_token(strdup(yytext), REGULAR_TOKEN); input(yyscanner); eat_until_unescaped('"'); return 1; }
+<sgml>[[:alnum:]_]+=/'            { feed_token(strdup(yytext), REGULAR_TOKEN); input(yyscanner); eat_until_unescaped('\''); return 1; }
+<sgml>[[:alnum:]_]+=[[:alnum:]_]* { feed_token(strdup(yytext), REGULAR_TOKEN); *(strchr(yyextra->token, '=') + 1) = 0; return 1; }
+<sgml>[[:alnum:]_]+               { feed_token(strdup(yytext), REGULAR_TOKEN); return 1; }
+<sgml>\>                          { BEGIN(INITIAL); }
+<sgml>.|\n                        { /* nothing */ }
+;|\{|\}|\(|\)|\[|\]               { feed_token(strdup(yytext), REGULAR_TOKEN); return 1; }
+[[:alnum:]_.@#/*]+                {
+    if (strncmp(yytext, "/*", 2) == 0) {
+      if (strlen(yytext) >= 4 && strcmp(yytext + strlen(yytext) - 2, "*/") == 0) {
+        /* nothing */
+      } else {
+        BEGIN(c_comment);
+      }
+    } else {
+      feed_token(strdup(yytext), REGULAR_TOKEN);
+      return 1;
+    }
+  }
+\<\<?|\+|\-|\*|\/|%|&&?|\|\|?     { feed_token(strdup(yytext), REGULAR_TOKEN); return 1; }
+.|\n                              { /* nothing */ }
+%%