file-lang-map 1.0.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,21 +1,22 @@
1
1
  # file-lang-map
2
2
 
3
- **Fast, zero-dependency way to identify programming languages from filenames and extensions.**
3
+ **Fast, zero-dependency way to identify programming languages from paths, filenames, and extensions.**
4
4
 
5
- ### Why
5
+ ## Why
6
6
 
7
- Most language detection libraries are either too heavy or too slow (looping over arrays).
8
- `file-lang-map` pre-indexes GitHub
9
- Linguist [languages.yml](https://github.com/github/linguist/blob/master/lib/linguist/languages.yml) data into optimized
10
- hash maps, ensuring instant lookups with a tiny footprint.
7
+ Some language detectors can be heavy or rely on linear scans. `file-lang-map` pre-indexes GitHub
8
+ Linguist [languages.yml](https://github.com/github/linguist/blob/master/lib/linguist/languages.yml) into compact lookup
9
+ maps, giving near-instant lookups, a small bundle size, and zero runtime dependencies.
11
10
 
12
11
  ## Features
13
12
 
14
- - **O(1) Performance:** Lookups are instant, regardless of how many languages exist.
15
- - **Browser Ready:** Zero dependencies (no `fs`, no `path`). Works in Vite, Next.js, React, Vue.
16
- - **Collision Aware:** Correctly handles ambiguous extensions (e.g., `.rs` returns both "Rust" and "RenderScript").
17
- - **Auto-Updated:** Data is fetched directly from GitHub Linguist sources.
18
- - **Tiny:** Tree-shakable. Only load what you use.
13
+ - **O(1) (average-case) Performance:** Lookups are instant, regardless of how many languages exist.
14
+ - **Browser Ready:** Zero runtime dependencies. Works in browser and Node.js.
15
+ - **TypeScript Support:** Includes built-in type definitions.
16
+ - **Flexible:** Works with full and relative paths, filenames, or just extensions for all platforms.
17
+ - **Tiny:** Tree-shakable. Only load what you use. (Use named imports and a bundler that supports tree-shaking)
18
+ - **Collision Aware:** Correctly handles ambiguous extensions (e.g., `.h` returns "C", "C++" and "Objective-C").
19
+ - **Auto-Updated:** Data is fetched directly from GitHub Linguist sources using GitHub actions weekly.
19
20
 
20
21
  ## Installation
21
22
 
@@ -23,11 +24,45 @@ hash maps, ensuring instant lookups with a tiny footprint.
23
24
  npm install file-lang-map
24
25
  ```
25
26
 
26
- ## Usage
27
+ ## Quick Start
28
+
29
+ ```typescript
30
+ import {getLanguageByFileName, getLanguage, getLanguagesByType} from 'file-lang-map';
31
+
32
+ // Get all possible languages from filename (may return null if unknown)
33
+ const languages = getLanguageByFileName('path/to/file.js');
34
+ if (languages === null) {
35
+ console.log('not found')
36
+ } else {
37
+ console.log(languages)
38
+ // ['JavaScript']
39
+ }
40
+
41
+ // Get language metadata by name. Case-insensitive lookup ("javascript" or "JavaScript").
42
+ const language = getLanguage('JavaScript');
43
+ /*
44
+ {
45
+ name: 'JavaScript',
46
+ type: 'programming',
47
+ extensions: ['.js', '.cjs', '.mjs', ... ], // list of all know extensions
48
+ filenames: ['Jakefile'] // list of all known filenames
49
+ }
50
+ */
51
+
52
+ // You can optionally filter results by type (e.g., only 'programming')
53
+ const prog = getLanguageByFileName('data.json', 'programming');
54
+ // prog === null because JSON is a 'data' type
55
+
56
+ // Get list of all known "programming" languages (also available - 'data', 'markup', 'prose')
57
+ const programmingLanguages = getLanguagesByType('programming');
58
+ // ['JavaScript', 'Python', 'TypeScript', 'Rust', ...]
59
+ ```
60
+
61
+ ## Examples
27
62
 
28
63
  ### 1. Identify Language by Filename
29
64
 
30
- Handles full paths, exact filenames, and extensions. Returns an array of language names.
65
+ Handles full paths (absolute or relative), exact filenames, and extensions. Returns an array of language names.
31
66
 
32
67
  ```typescript
33
68
  import {getLanguageByFileName} from 'file-lang-map';
@@ -66,10 +101,13 @@ const json = getLanguageByFileName('data.json', 'programming');
66
101
  ### 3. Get Language Metadata
67
102
 
68
103
  Lookup full language details by name (case-insensitive).
104
+ Language object includes all possible extensions for the language, name, possible filenames, and type.
69
105
 
70
106
  ```typescript
71
107
  import {getLanguage} from 'file-lang-map';
72
108
 
109
+ // Case-insensitive lookup.
110
+ // Returns language object which includes all possible extensions for the language, name...
73
111
  const lang = getLanguage('javascript');
74
112
  /*
75
113
  {
@@ -93,13 +131,16 @@ const lang = getLanguage('javascript');
93
131
 
94
132
  ### 4. Get All Languages by Type
95
133
 
96
- Useful for filtering lists or building dropdowns.
134
+ Useful for filtering lists or building dropdowns. Returns an array of language names (strings).
97
135
 
98
136
  ```typescript
99
137
  import {getLanguagesByType} from 'file-lang-map';
100
138
 
101
139
  const programmingLangs = getLanguagesByType('programming');
102
- // Returns: [ { name: "JavaScript", type: "programming", ... }, { name: "Python"... }, ... ]
140
+ // Returns: ['JavaScript', 'Python', 'TypeScript', 'Rust', ...]
141
+
142
+ const dataLangs = getLanguagesByType('data');
143
+ // Returns: ['JSON', 'YAML', 'CSV', 'TOML', ...]
103
144
  ```
104
145
 
105
146
  ## API Reference
@@ -119,14 +160,14 @@ Look up a full language object by name. Case-insensitive (`"python"`, `"Python"`
119
160
  - **name**: language name
120
161
  - **Returns**: language object
121
162
 
122
- ### `getLanguagesByType(type: LanguageType): Language[]`
163
+ ### `getLanguagesByType(type: LanguageType): string[]`
123
164
 
124
- Get all full language objects belonging to a specific type.
165
+ Get all language names belonging to a specific type.
125
166
 
126
167
  - **type**: `'programming' | 'data' | 'markup' | 'prose'`.
127
- - **Returns**: Array of language objects.
168
+ - **Returns**: Array of language names (strings).
128
169
 
129
- ## Contributing
170
+ ## Contributing and Development
130
171
 
131
172
  This package is **self-updating**. The data is fetched from GitHub Linguist automatically.
132
173
  To refresh the data locally:
@@ -135,12 +176,21 @@ To refresh the data locally:
135
176
  npm run generate
136
177
  ```
137
178
 
138
- ## How Self-Updating Works
179
+ ### Running Tests
180
+
181
+ The project has two test commands:
182
+
183
+ - **`npm test`**: Uses native Node.js glob patterns (`test/**/*.test.ts`). Requires Node.js 20.11+ or 22+.
184
+ - **`npm run test:ci`**: Uses shell `find` command for file discovery. Compatible with Node.js 18+.
185
+
186
+ The CI pipeline tests on Node 18 and 24, so `test:ci` ensures compatibility with older Node versions that don't support
187
+ glob patterns in the `--test` flag.
188
+
189
+ ### How Self-Updating Works
139
190
 
140
- The project uses a `linguist-lock.json` file to track the state of the upstream `languist.yml` (sha256 hash of
141
- linguisl.yml).
191
+ The project creates and uses a `linguist-lock.json` file to track the state of the upstream `linguist.yml`.
142
192
 
143
- - When you or CI/CD run `npm run generate`, it downloads the latest data and calculates a hash.
193
+ - When the CI/CD run `npm run generate`, it downloads the latest data and calculates a hash.
144
194
  - If the hash differs from `linguist-lock.json`, the lock file is updated.
145
195
  - The CI/CD pipeline (`.github/workflows/update-and-publish.yml)` checks for changes in `linguist-lock.json` to decide
146
196
  whether to release a new version.
package/dist/index.d.mts CHANGED
@@ -10,24 +10,24 @@ interface Language {
10
10
  * The pretty display name of the language (e.g., "C++", "JavaScript", "JSON").
11
11
  * Use this for UI labels.
12
12
  */
13
- name: string;
13
+ readonly name: string;
14
14
  /**
15
15
  * The category of the language.
16
16
  */
17
- type: LanguageType;
17
+ readonly type: LanguageType;
18
18
  /**
19
19
  * List of file extensions associated with this language (e.g., [".js", ".mjs"]).
20
20
  */
21
- extensions: string[];
21
+ readonly extensions: string[];
22
22
  /**
23
23
  * List of specific filenames associated with this language (e.g., ["Jenkinsfile"]).
24
24
  */
25
- filenames: string[];
25
+ readonly filenames: string[];
26
26
  /**
27
27
  * The parent group name, if applicable (e.g., "Shell" for "Alpine Abuild", "TypeScript" for "TSX").
28
28
  * Note: This refers to Linguist's inheritance grouping, not the 'type'.
29
29
  */
30
- group?: string;
30
+ readonly group?: string;
31
31
  }
32
32
 
33
33
  /**
@@ -51,29 +51,29 @@ interface Language {
51
51
  declare function getLanguage(languageName: string): Language | null;
52
52
  /**
53
53
  * Get all languages belonging to a specific type category.
54
- * Returns an array of full Language objects, not just names.
54
+ * Returns an array of language names (strings).
55
55
  *
56
56
  * @param {LanguageType} type - The category: 'programming' | 'data' | 'markup' | 'prose'
57
- * @returns {Language[]} Array of Language objects matching the type, or empty array if none found
57
+ * @returns {string[]} Array of language names matching the type, or empty array if none found
58
58
  *
59
59
  * @example
60
60
  * const programmingLangs = getLanguagesByType('programming');
61
- * // => [{ name: 'JavaScript', type: 'programming', ... }, { name: 'Python', ... }, ...]
61
+ * // => ['JavaScript', 'Python', 'TypeScript', 'Rust', ...]
62
62
  *
63
63
  * @example
64
64
  * const dataLangs = getLanguagesByType('data');
65
- * // => [{ name: 'JSON', type: 'data', ... }, { name: 'YAML', ... }, ...]
65
+ * // => ['JSON', 'YAML', 'CSV', ...]
66
66
  *
67
67
  * @example
68
68
  * const markupLangs = getLanguagesByType('markup');
69
- * // => [{ name: 'HTML', type: 'markup', ... }, { name: 'XML', ... }, ...]
69
+ * // => ['HTML', 'XML', 'Markdown', ...]
70
70
  *
71
71
  * @example
72
72
  * // @ts-ignore - invalid type
73
73
  * const invalid = getLanguagesByType('invalid');
74
74
  * // => []
75
75
  */
76
- declare function getLanguagesByType(type: LanguageType): Language[];
76
+ declare function getLanguagesByType(type: LanguageType): string[];
77
77
  /**
78
78
  * Get potential language names for a given file path or filename.
79
79
  * Returns an array of language names because some extensions map to multiple languages (e.g., .rs → Rust, RenderScript).
package/dist/index.d.ts CHANGED
@@ -10,24 +10,24 @@ interface Language {
10
10
  * The pretty display name of the language (e.g., "C++", "JavaScript", "JSON").
11
11
  * Use this for UI labels.
12
12
  */
13
- name: string;
13
+ readonly name: string;
14
14
  /**
15
15
  * The category of the language.
16
16
  */
17
- type: LanguageType;
17
+ readonly type: LanguageType;
18
18
  /**
19
19
  * List of file extensions associated with this language (e.g., [".js", ".mjs"]).
20
20
  */
21
- extensions: string[];
21
+ readonly extensions: string[];
22
22
  /**
23
23
  * List of specific filenames associated with this language (e.g., ["Jenkinsfile"]).
24
24
  */
25
- filenames: string[];
25
+ readonly filenames: string[];
26
26
  /**
27
27
  * The parent group name, if applicable (e.g., "Shell" for "Alpine Abuild", "TypeScript" for "TSX").
28
28
  * Note: This refers to Linguist's inheritance grouping, not the 'type'.
29
29
  */
30
- group?: string;
30
+ readonly group?: string;
31
31
  }
32
32
 
33
33
  /**
@@ -51,29 +51,29 @@ interface Language {
51
51
  declare function getLanguage(languageName: string): Language | null;
52
52
  /**
53
53
  * Get all languages belonging to a specific type category.
54
- * Returns an array of full Language objects, not just names.
54
+ * Returns an array of language names (strings).
55
55
  *
56
56
  * @param {LanguageType} type - The category: 'programming' | 'data' | 'markup' | 'prose'
57
- * @returns {Language[]} Array of Language objects matching the type, or empty array if none found
57
+ * @returns {string[]} Array of language names matching the type, or empty array if none found
58
58
  *
59
59
  * @example
60
60
  * const programmingLangs = getLanguagesByType('programming');
61
- * // => [{ name: 'JavaScript', type: 'programming', ... }, { name: 'Python', ... }, ...]
61
+ * // => ['JavaScript', 'Python', 'TypeScript', 'Rust', ...]
62
62
  *
63
63
  * @example
64
64
  * const dataLangs = getLanguagesByType('data');
65
- * // => [{ name: 'JSON', type: 'data', ... }, { name: 'YAML', ... }, ...]
65
+ * // => ['JSON', 'YAML', 'CSV', ...]
66
66
  *
67
67
  * @example
68
68
  * const markupLangs = getLanguagesByType('markup');
69
- * // => [{ name: 'HTML', type: 'markup', ... }, { name: 'XML', ... }, ...]
69
+ * // => ['HTML', 'XML', 'Markdown', ...]
70
70
  *
71
71
  * @example
72
72
  * // @ts-ignore - invalid type
73
73
  * const invalid = getLanguagesByType('invalid');
74
74
  * // => []
75
75
  */
76
- declare function getLanguagesByType(type: LanguageType): Language[];
76
+ declare function getLanguagesByType(type: LanguageType): string[];
77
77
  /**
78
78
  * Get potential language names for a given file path or filename.
79
79
  * Returns an array of language names because some extensions map to multiple languages (e.g., .rs → Rust, RenderScript).