file-lang-map 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,24 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Mykhailo Onikiienko
4
+
5
+ Permission is hereby granted, free of charge, to any person
6
+ obtaining a copy of this software and associated documentation
7
+ files (the "Software"), to deal in the Software without
8
+ restriction, including without limitation the rights to use,
9
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the
11
+ Software is furnished to do so, subject to the following
12
+ conditions:
13
+
14
+ The above copyright notice and this permission notice shall be
15
+ included in all copies or substantial portions of the Software.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24
+ OTHER DEALINGS IN THE SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,150 @@
1
+ # file-lang-map
2
+
3
+ **Fast, zero-dependency way to identify programming languages from filenames and extensions.**
4
+
5
+ ### Why
6
+
7
+ Most language detection libraries are either too heavy or too slow (looping over arrays).
8
+ `file-lang-map` pre-indexes GitHub
9
+ Linguist [languages.yml](https://github.com/github/linguist/blob/master/lib/linguist/languages.yml) data into optimized
10
+ hash maps, ensuring instant lookups with a tiny footprint.
11
+
12
+ ## Features
13
+
14
+ - **O(1) Performance:** Lookups are instant, regardless of how many languages exist.
15
+ - **Browser Ready:** Zero dependencies (no `fs`, no `path`). Works in Vite, Next.js, React, Vue.
16
+ - **Collision Aware:** Correctly handles ambiguous extensions (e.g., `.rs` returns both "Rust" and "RenderScript").
17
+ - **Auto-Updated:** Data is fetched directly from GitHub Linguist sources.
18
+ - **Tiny:** Tree-shakable. Only load what you use.
19
+
20
+ ## Installation
21
+
22
+ ```bash
23
+ npm install file-lang-map
24
+ ```
25
+
26
+ ## Usage
27
+
28
+ ### 1. Identify Language by Filename
29
+
30
+ Handles full paths, exact filenames, and extensions. Returns an array of language names.
31
+
32
+ ```typescript
33
+ import {getLanguageByFileName} from 'file-lang-map';
34
+
35
+ // Standard extensions
36
+ const langs = getLanguageByFileName('src/app/main.js');
37
+ console.log(langs); // ["JavaScript"]
38
+
39
+ // Exact filenames
40
+ const docker = getLanguageByFileName('Dockerfile');
41
+ console.log(docker); // ["Dockerfile"]
42
+
43
+ // Ambiguous extensions (Collision handling)
44
+ const candidates = getLanguageByFileName('lib.rs');
45
+ // Returns: ['RenderScript', 'Rust', 'XML']
46
+ ```
47
+
48
+ ### 2. Filter by Type
49
+
50
+ You can pass a second argument to filter results immediately (e.g., only "programming" languages).
51
+
52
+ ```typescript
53
+ // .h can be C, C++, or Objective-C (all 'programming')
54
+ const headers = getLanguageByFileName('header.h', 'programming');
55
+ // Returns: [ 'C', 'C++', 'Objective-C' ]
56
+
57
+ // .ts can be TypeScript and XML (XML is "data" type)
58
+ const typescript = getLanguageByFileName('path/to/index.ts', 'programming');
59
+ // Returns: [ 'TypeScript' ]
60
+
61
+ // .json is 'data', so filtering by 'programming' returns null
62
+ const json = getLanguageByFileName('data.json', 'programming');
63
+ // Returns: null
64
+ ```
65
+
66
+ ### 3. Get Language Metadata
67
+
68
+ Lookup full language details by name (case-insensitive).
69
+
70
+ ```typescript
71
+ import {getLanguage} from 'file-lang-map';
72
+
73
+ const lang = getLanguage('javascript');
74
+ /*
75
+ {
76
+ name: 'JavaScript',
77
+ type: 'programming',
78
+ extensions: [
79
+ '.js', '._js', '.bones',
80
+ '.cjs', '.es', '.es6',
81
+ '.frag', '.gs', '.jake',
82
+ '.javascript', '.jsb', '.jscad',
83
+ '.jsfl', '.jslib', '.jsm',
84
+ '.jspre', '.jss', '.jsx',
85
+ '.mjs', '.njs', '.pac',
86
+ '.sjs', '.ssjs', '.xsjs',
87
+ '.xsjslib'
88
+ ],
89
+ filenames: [ 'Jakefile' ]
90
+ }
91
+ */
92
+ ```
93
+
94
+ ### 4. Get All Languages by Type
95
+
96
+ Useful for filtering lists or building dropdowns.
97
+
98
+ ```typescript
99
+ import {getLanguagesByType} from 'file-lang-map';
100
+
101
+ const programmingLangs = getLanguagesByType('programming');
102
+ // Returns: [ { name: "JavaScript", type: "programming", ... }, { name: "Python"... }, ... ]
103
+ ```
104
+
105
+ ## API Reference
106
+
107
+ ### `getLanguageByFileName(fileName: string, typeFilter?: LanguageType): string[] | null`
108
+
109
+ Returns an **array of strings** (names).
110
+
111
+ - **fileName**: Can be a full path, a relative path, or just a filename.
112
+ - **typeFilter**: (Optional) Filter by `'programming' | 'data' | 'markup' | 'prose'`.
113
+ - **Returns**: Array of names or `null` if not found.
114
+
115
+ ### `getLanguage(name: string): Language | null`
116
+
117
+ Look up a full language object by name. Case-insensitive (`"python"`, `"Python"` work).
118
+
119
+ - **name**: language name
120
+ - **Returns**: language object
121
+
122
+ ### `getLanguagesByType(type: LanguageType): Language[]`
123
+
124
+ Get all full language objects belonging to a specific type.
125
+
126
+ - **type**: `'programming' | 'data' | 'markup' | 'prose'`.
127
+ - **Returns**: Array of language objects.
128
+
129
+ ## Contributing
130
+
131
+ This package is **self-updating**. The data is fetched from GitHub Linguist automatically.
132
+ To refresh the data locally:
133
+
134
+ ```bash
135
+ npm run generate
136
+ ```
137
+
138
+ ## How Self-Updating Works
139
+
140
+ The project uses a `linguist-lock.json` file to track the state of the upstream `languist.yml` (sha256 hash of
141
+ linguisl.yml).
142
+
143
+ - When you or CI/CD run `npm run generate`, it downloads the latest data and calculates a hash.
144
+ - If the hash differs from `linguist-lock.json`, the lock file is updated.
145
+ - The CI/CD pipeline (`.github/workflows/update-and-publish.yml)` checks for changes in `linguist-lock.json` to decide
146
+ whether to release a new version.
147
+
148
+ ## License
149
+
150
+ MIT
@@ -0,0 +1,121 @@
1
+ /**
2
+ * The categories used by GitHub Linguist.
3
+ */
4
+ type LanguageType = 'data' | 'programming' | 'markup' | 'prose';
5
+ /**
6
+ * The Language Object returned by the API.
7
+ */
8
+ interface Language {
9
+ /**
10
+ * The pretty display name of the language (e.g., "C++", "JavaScript", "JSON").
11
+ * Use this for UI labels.
12
+ */
13
+ name: string;
14
+ /**
15
+ * The category of the language.
16
+ */
17
+ type: LanguageType;
18
+ /**
19
+ * List of file extensions associated with this language (e.g., [".js", ".mjs"]).
20
+ */
21
+ extensions: string[];
22
+ /**
23
+ * List of specific filenames associated with this language (e.g., ["Jenkinsfile"]).
24
+ */
25
+ filenames: string[];
26
+ /**
27
+ * The parent group name, if applicable (e.g., "Shell" for "Alpine Abuild", "TypeScript" for "TSX").
28
+ * Note: This refers to Linguist's inheritance grouping, not the 'type'.
29
+ */
30
+ group?: string;
31
+ }
32
+
33
+ /**
34
+ * Get full language metadata by name (case-insensitive lookup).
35
+ *
36
+ * @param {string} languageName - The name of the language (e.g., "JavaScript", "python", "TypeScript")
37
+ * @returns {Language | null} The Language object with metadata, or null if not found
38
+ *
39
+ * @example
40
+ * const lang = getLanguage('javascript');
41
+ * // => { name: 'JavaScript', type: 'programming', extensions: ['.js', '.mjs', ...], filenames: [...] }
42
+ *
43
+ * @example
44
+ * const lang = getLanguage('PyThOn'); // Case-insensitive
45
+ * // => { name: 'Python', type: 'programming', extensions: ['.py', ...], filenames: [...] }
46
+ *
47
+ * @example
48
+ * const lang = getLanguage('UnknownLang');
49
+ * // => null
50
+ */
51
+ declare function getLanguage(languageName: string): Language | null;
52
+ /**
53
+ * Get all languages belonging to a specific type category.
54
+ * Returns an array of full Language objects, not just names.
55
+ *
56
+ * @param {LanguageType} type - The category: 'programming' | 'data' | 'markup' | 'prose'
57
+ * @returns {Language[]} Array of Language objects matching the type, or empty array if none found
58
+ *
59
+ * @example
60
+ * const programmingLangs = getLanguagesByType('programming');
61
+ * // => [{ name: 'JavaScript', type: 'programming', ... }, { name: 'Python', ... }, ...]
62
+ *
63
+ * @example
64
+ * const dataLangs = getLanguagesByType('data');
65
+ * // => [{ name: 'JSON', type: 'data', ... }, { name: 'YAML', ... }, ...]
66
+ *
67
+ * @example
68
+ * const markupLangs = getLanguagesByType('markup');
69
+ * // => [{ name: 'HTML', type: 'markup', ... }, { name: 'XML', ... }, ...]
70
+ *
71
+ * @example
72
+ * // @ts-ignore - invalid type
73
+ * const invalid = getLanguagesByType('invalid');
74
+ * // => []
75
+ */
76
+ declare function getLanguagesByType(type: LanguageType): Language[];
77
+ /**
78
+ * Get potential language names for a given file path or filename.
79
+ * Returns an array of language names because some extensions map to multiple languages (e.g., .rs → Rust, RenderScript).
80
+ * Handles full paths, relative paths, exact filenames (like "Dockerfile"), and extensions.
81
+ *
82
+ * @param {string} fileName - File path, relative path, or filename (e.g., "main.ts", "/src/app/main.ts", "Dockerfile")
83
+ * @param {LanguageType} [typeFilter] - Optional filter by category: 'programming' | 'data' | 'markup' | 'prose'
84
+ * @returns {string[] | null} Array of language names (e.g., ['JavaScript']) or null if no match found
85
+ *
86
+ * @example
87
+ * getLanguageByFileName('main.js')
88
+ * // => ['JavaScript']
89
+ *
90
+ * @example
91
+ * getLanguageByFileName('.gitignore')
92
+ * // => ['Ignore List']
93
+ *
94
+ * @example
95
+ * // Ambiguous extension - returns multiple candidates
96
+ * getLanguageByFileName('shader.rs')
97
+ * // => ['RenderScript', 'Rust', 'XML']
98
+ *
99
+ * @example
100
+ * // Windows path with backslashes
101
+ * getLanguageByFileName('C:\\Users\\Dev\\project\\style.css')
102
+ * // => ['CSS']
103
+ *
104
+ * @example
105
+ * // Filter excludes non-matching types
106
+ * getLanguageByFileName('data.json', 'programming')
107
+ * // => null (JSON is 'data' type, not 'programming')
108
+ *
109
+ * @example
110
+ * // Unknown extension
111
+ * getLanguageByFileName('file.unknownext')
112
+ * // => null
113
+ *
114
+ * @example
115
+ * // File without extension (unless exact match)
116
+ * getLanguageByFileName('random_file_no_ext')
117
+ * // => null
118
+ */
119
+ declare function getLanguageByFileName(fileName: string, typeFilter?: LanguageType): string[] | null;
120
+
121
+ export { getLanguage, getLanguageByFileName, getLanguagesByType };
@@ -0,0 +1,121 @@
1
+ /**
2
+ * The categories used by GitHub Linguist.
3
+ */
4
+ type LanguageType = 'data' | 'programming' | 'markup' | 'prose';
5
+ /**
6
+ * The Language Object returned by the API.
7
+ */
8
+ interface Language {
9
+ /**
10
+ * The pretty display name of the language (e.g., "C++", "JavaScript", "JSON").
11
+ * Use this for UI labels.
12
+ */
13
+ name: string;
14
+ /**
15
+ * The category of the language.
16
+ */
17
+ type: LanguageType;
18
+ /**
19
+ * List of file extensions associated with this language (e.g., [".js", ".mjs"]).
20
+ */
21
+ extensions: string[];
22
+ /**
23
+ * List of specific filenames associated with this language (e.g., ["Jenkinsfile"]).
24
+ */
25
+ filenames: string[];
26
+ /**
27
+ * The parent group name, if applicable (e.g., "Shell" for "Alpine Abuild", "TypeScript" for "TSX").
28
+ * Note: This refers to Linguist's inheritance grouping, not the 'type'.
29
+ */
30
+ group?: string;
31
+ }
32
+
33
+ /**
34
+ * Get full language metadata by name (case-insensitive lookup).
35
+ *
36
+ * @param {string} languageName - The name of the language (e.g., "JavaScript", "python", "TypeScript")
37
+ * @returns {Language | null} The Language object with metadata, or null if not found
38
+ *
39
+ * @example
40
+ * const lang = getLanguage('javascript');
41
+ * // => { name: 'JavaScript', type: 'programming', extensions: ['.js', '.mjs', ...], filenames: [...] }
42
+ *
43
+ * @example
44
+ * const lang = getLanguage('PyThOn'); // Case-insensitive
45
+ * // => { name: 'Python', type: 'programming', extensions: ['.py', ...], filenames: [...] }
46
+ *
47
+ * @example
48
+ * const lang = getLanguage('UnknownLang');
49
+ * // => null
50
+ */
51
+ declare function getLanguage(languageName: string): Language | null;
52
+ /**
53
+ * Get all languages belonging to a specific type category.
54
+ * Returns an array of full Language objects, not just names.
55
+ *
56
+ * @param {LanguageType} type - The category: 'programming' | 'data' | 'markup' | 'prose'
57
+ * @returns {Language[]} Array of Language objects matching the type, or empty array if none found
58
+ *
59
+ * @example
60
+ * const programmingLangs = getLanguagesByType('programming');
61
+ * // => [{ name: 'JavaScript', type: 'programming', ... }, { name: 'Python', ... }, ...]
62
+ *
63
+ * @example
64
+ * const dataLangs = getLanguagesByType('data');
65
+ * // => [{ name: 'JSON', type: 'data', ... }, { name: 'YAML', ... }, ...]
66
+ *
67
+ * @example
68
+ * const markupLangs = getLanguagesByType('markup');
69
+ * // => [{ name: 'HTML', type: 'markup', ... }, { name: 'XML', ... }, ...]
70
+ *
71
+ * @example
72
+ * // @ts-ignore - invalid type
73
+ * const invalid = getLanguagesByType('invalid');
74
+ * // => []
75
+ */
76
+ declare function getLanguagesByType(type: LanguageType): Language[];
77
+ /**
78
+ * Get potential language names for a given file path or filename.
79
+ * Returns an array of language names because some extensions map to multiple languages (e.g., .rs → Rust, RenderScript).
80
+ * Handles full paths, relative paths, exact filenames (like "Dockerfile"), and extensions.
81
+ *
82
+ * @param {string} fileName - File path, relative path, or filename (e.g., "main.ts", "/src/app/main.ts", "Dockerfile")
83
+ * @param {LanguageType} [typeFilter] - Optional filter by category: 'programming' | 'data' | 'markup' | 'prose'
84
+ * @returns {string[] | null} Array of language names (e.g., ['JavaScript']) or null if no match found
85
+ *
86
+ * @example
87
+ * getLanguageByFileName('main.js')
88
+ * // => ['JavaScript']
89
+ *
90
+ * @example
91
+ * getLanguageByFileName('.gitignore')
92
+ * // => ['Ignore List']
93
+ *
94
+ * @example
95
+ * // Ambiguous extension - returns multiple candidates
96
+ * getLanguageByFileName('shader.rs')
97
+ * // => ['RenderScript', 'Rust', 'XML']
98
+ *
99
+ * @example
100
+ * // Windows path with backslashes
101
+ * getLanguageByFileName('C:\\Users\\Dev\\project\\style.css')
102
+ * // => ['CSS']
103
+ *
104
+ * @example
105
+ * // Filter excludes non-matching types
106
+ * getLanguageByFileName('data.json', 'programming')
107
+ * // => null (JSON is 'data' type, not 'programming')
108
+ *
109
+ * @example
110
+ * // Unknown extension
111
+ * getLanguageByFileName('file.unknownext')
112
+ * // => null
113
+ *
114
+ * @example
115
+ * // File without extension (unless exact match)
116
+ * getLanguageByFileName('random_file_no_ext')
117
+ * // => null
118
+ */
119
+ declare function getLanguageByFileName(fileName: string, typeFilter?: LanguageType): string[] | null;
120
+
121
+ export { getLanguage, getLanguageByFileName, getLanguagesByType };