odmieniacz.js 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -0
- package/index.d.ts +121 -0
- package/index.js +242 -0
- package/package.json +34 -0
- package/scripts/smoke.js +21 -0
package/README.md
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# odmieniacz.js
|
|
2
|
+
|
|
3
|
+
Simple Polish declension (cases) API powered by [`morfeusz2-node-bindings`](https://www.npmjs.com/package/morfeusz2-node-bindings).
|
|
4
|
+
|
|
5
|
+
## Usage
|
|
6
|
+
|
|
7
|
+
```js
|
|
8
|
+
const odm = require("odmieniacz.js");
|
|
9
|
+
|
|
10
|
+
odm.dative("Marcin"); // "Marcinowi"
|
|
11
|
+
odm.vocative("Marcin"); // "Marcinie"
|
|
12
|
+
|
|
13
|
+
odm.accusative("Marcin"); // "Marcina"
|
|
14
|
+
odm.cases("Marcin");
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Notes
|
|
18
|
+
|
|
19
|
+
- This library is sync-only.
|
|
20
|
+
- This package is **heavy** (native Morfeusz2 + dictionary) and is intended for **server-side** usage (Node.js), not the browser.
|
|
21
|
+
- It inflects **single tokens** (one word at a time).
|
|
22
|
+
- Under the hood it:
|
|
23
|
+
1) runs Morfeusz analysis to pick the best interpretation (`lemma` + `tag`)
|
|
24
|
+
2) rewrites the tag case (`nom/gen/dat/acc/inst/loc/voc`)
|
|
25
|
+
3) calls Morfeusz synthesis (`generateTagged`)
|
|
26
|
+
|
|
27
|
+
If you need full control, you can pass `opts.lemma`/`opts.tag`, or call `analyse()` / `generateTagged()` directly.
|
package/index.d.ts
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
export type CaseName =
|
|
2
|
+
| "nominative"
|
|
3
|
+
| "genitive"
|
|
4
|
+
| "dative"
|
|
5
|
+
| "accusative"
|
|
6
|
+
| "instrumental"
|
|
7
|
+
| "locative"
|
|
8
|
+
| "vocative"
|
|
9
|
+
| "mianownik"
|
|
10
|
+
| "dopelniacz"
|
|
11
|
+
| "dopełniacz"
|
|
12
|
+
| "celownik"
|
|
13
|
+
| "biernik"
|
|
14
|
+
| "narzednik"
|
|
15
|
+
| "narzędnik"
|
|
16
|
+
| "miejscownik"
|
|
17
|
+
| "wolacz"
|
|
18
|
+
| "wołacz";
|
|
19
|
+
|
|
20
|
+
export type CaseCode = "nom" | "gen" | "dat" | "acc" | "inst" | "loc" | "voc";
|
|
21
|
+
|
|
22
|
+
export interface MorfeuszVersion {
|
|
23
|
+
addon: string;
|
|
24
|
+
napi: number;
|
|
25
|
+
morfeusz?: string;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export interface AnalyseOpts {
|
|
29
|
+
returnGraph?: boolean;
|
|
30
|
+
keepWhitespace?: boolean;
|
|
31
|
+
caseSensitiveLemmas?: boolean;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export interface Interpretation {
|
|
35
|
+
lemma: string;
|
|
36
|
+
tag: string;
|
|
37
|
+
name?: string;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export interface AnalysisToken {
|
|
41
|
+
token: string;
|
|
42
|
+
start: number;
|
|
43
|
+
end: number;
|
|
44
|
+
interpretations: Interpretation[];
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export interface AnalysisGraphEdge {
|
|
48
|
+
from: number;
|
|
49
|
+
to: number;
|
|
50
|
+
orth: string;
|
|
51
|
+
interp: Interpretation;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export interface AnalysisGraph {
|
|
55
|
+
nodes: number;
|
|
56
|
+
edges: AnalysisGraphEdge[];
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export interface AnalysisResult {
|
|
60
|
+
tokens: AnalysisToken[];
|
|
61
|
+
graph?: AnalysisGraph;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
export interface GenerateOpts {
|
|
65
|
+
includeTags?: boolean;
|
|
66
|
+
dedupe?: boolean;
|
|
67
|
+
returnAllByLemmaVariant?: boolean;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export type GenerateAllResult = string[] | Array<{ form: string; tag: string; name?: string }>;
|
|
71
|
+
|
|
72
|
+
export interface DictionaryInfo {
|
|
73
|
+
embedded: boolean;
|
|
74
|
+
defaultDir?: string;
|
|
75
|
+
current?: string;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export interface InflectOpts {
|
|
79
|
+
lemma?: string;
|
|
80
|
+
tag?: string;
|
|
81
|
+
dedupe?: boolean;
|
|
82
|
+
all?: boolean;
|
|
83
|
+
analyseOpts?: Omit<AnalyseOpts, "returnGraph">;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
export interface AllCases {
|
|
87
|
+
nominative: string;
|
|
88
|
+
genitive: string;
|
|
89
|
+
dative: string;
|
|
90
|
+
accusative: string;
|
|
91
|
+
instrumental: string;
|
|
92
|
+
locative: string;
|
|
93
|
+
vocative: string;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export function version(): MorfeuszVersion;
|
|
97
|
+
export function analyse(text: string, opts?: AnalyseOpts): AnalysisResult;
|
|
98
|
+
export function generate(lemma: string, opts?: GenerateOpts): GenerateAllResult;
|
|
99
|
+
export function generateTagged(lemma: string, tag: string, opts?: GenerateOpts): string[];
|
|
100
|
+
export function setDictionary(pathOrName: string): void;
|
|
101
|
+
export function getDictionaryInfo(): DictionaryInfo;
|
|
102
|
+
|
|
103
|
+
export function case(word: string, caseName: CaseName | CaseCode, opts?: InflectOpts): string | string[];
|
|
104
|
+
export function cases(word: string, opts?: InflectOpts): AllCases;
|
|
105
|
+
|
|
106
|
+
export function nominative(word: string, opts?: InflectOpts): string | string[];
|
|
107
|
+
export function genitive(word: string, opts?: InflectOpts): string | string[];
|
|
108
|
+
export function dative(word: string, opts?: InflectOpts): string | string[];
|
|
109
|
+
export function accusative(word: string, opts?: InflectOpts): string | string[];
|
|
110
|
+
export function instrumental(word: string, opts?: InflectOpts): string | string[];
|
|
111
|
+
export function locative(word: string, opts?: InflectOpts): string | string[];
|
|
112
|
+
export function vocative(word: string, opts?: InflectOpts): string | string[];
|
|
113
|
+
|
|
114
|
+
export function mianownik(word: string, opts?: InflectOpts): string | string[];
|
|
115
|
+
export function dopelniacz(word: string, opts?: InflectOpts): string | string[];
|
|
116
|
+
export function celownik(word: string, opts?: InflectOpts): string | string[];
|
|
117
|
+
export function biernik(word: string, opts?: InflectOpts): string | string[];
|
|
118
|
+
export function narzednik(word: string, opts?: InflectOpts): string | string[];
|
|
119
|
+
export function miejscownik(word: string, opts?: InflectOpts): string | string[];
|
|
120
|
+
export function wolacz(word: string, opts?: InflectOpts): string | string[];
|
|
121
|
+
|
package/index.js
ADDED
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
const morfeusz = require("morfeusz2-node-bindings");
|
|
4
|
+
|
|
5
|
+
const CASE_CODE_BY_NAME = Object.freeze({
|
|
6
|
+
nominative: "nom",
|
|
7
|
+
genitive: "gen",
|
|
8
|
+
dative: "dat",
|
|
9
|
+
accusative: "acc",
|
|
10
|
+
instrumental: "inst",
|
|
11
|
+
locative: "loc",
|
|
12
|
+
vocative: "voc",
|
|
13
|
+
|
|
14
|
+
mianownik: "nom",
|
|
15
|
+
dopelniacz: "gen",
|
|
16
|
+
dopełniacz: "gen",
|
|
17
|
+
celownik: "dat",
|
|
18
|
+
biernik: "acc",
|
|
19
|
+
narzednik: "inst",
|
|
20
|
+
narzędnik: "inst",
|
|
21
|
+
miejscownik: "loc",
|
|
22
|
+
wolacz: "voc",
|
|
23
|
+
wołacz: "voc"
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
const CASE_CODES = new Set(["nom", "gen", "dat", "acc", "inst", "loc", "voc"]);
|
|
27
|
+
const NUMBER_CODES = new Set(["sg", "pl"]);
|
|
28
|
+
const GENDER_CODES = new Set([
|
|
29
|
+
"m1",
|
|
30
|
+
"m2",
|
|
31
|
+
"m3",
|
|
32
|
+
"f",
|
|
33
|
+
"n1",
|
|
34
|
+
"n2",
|
|
35
|
+
"p1",
|
|
36
|
+
"p2",
|
|
37
|
+
"p3"
|
|
38
|
+
]);
|
|
39
|
+
|
|
40
|
+
function assertString(value, name) {
|
|
41
|
+
if (typeof value !== "string") {
|
|
42
|
+
throw new TypeError(`${name} must be a string`);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function normalizeWord(word) {
|
|
47
|
+
assertString(word, "word");
|
|
48
|
+
const trimmed = word.trim();
|
|
49
|
+
if (!trimmed) throw new TypeError("word must be a non-empty string");
|
|
50
|
+
if (/\s/.test(trimmed)) {
|
|
51
|
+
throw new TypeError(
|
|
52
|
+
"word must be a single token (no whitespace); pass one word at a time"
|
|
53
|
+
);
|
|
54
|
+
}
|
|
55
|
+
return trimmed;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function normalizeCase(caseName) {
|
|
59
|
+
assertString(caseName, "caseName");
|
|
60
|
+
const key = caseName.trim().toLowerCase();
|
|
61
|
+
const code = CASE_CODE_BY_NAME[key];
|
|
62
|
+
if (!code) {
|
|
63
|
+
const known = Object.keys(CASE_CODE_BY_NAME)
|
|
64
|
+
.filter((k) => !k.includes("ł") && !k.includes("ó") && !k.includes("ę"))
|
|
65
|
+
.slice(0, 7)
|
|
66
|
+
.join(", ");
|
|
67
|
+
throw new TypeError(
|
|
68
|
+
`Unknown case "${caseName}". Use: ${known} (or Polish names).`
|
|
69
|
+
);
|
|
70
|
+
}
|
|
71
|
+
return code;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function scoreInterpretation(interpretation) {
|
|
75
|
+
const tag = interpretation.tag || "";
|
|
76
|
+
const parts = tag.split(":");
|
|
77
|
+
|
|
78
|
+
let score = 0;
|
|
79
|
+
if (parts[0] === "subst") score += 100;
|
|
80
|
+
if (parts[0] === "depr") score += 90;
|
|
81
|
+
if (parts[0] === "adj") score += 70;
|
|
82
|
+
if (parts.includes("sg")) score += 10;
|
|
83
|
+
if (parts.includes("nom")) score += 5;
|
|
84
|
+
if (parts.includes("prop")) score += 2;
|
|
85
|
+
|
|
86
|
+
return score;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function extractTagFeatures(tag) {
|
|
90
|
+
const parts = String(tag || "").split(":");
|
|
91
|
+
return {
|
|
92
|
+
pos: parts[0] || "",
|
|
93
|
+
number: parts.find((p) => NUMBER_CODES.has(p)),
|
|
94
|
+
gender: parts.find((p) => GENDER_CODES.has(p))
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function tagHasCase(tag, targetCase) {
|
|
99
|
+
const parts = String(tag || "").split(":");
|
|
100
|
+
for (const part of parts) {
|
|
101
|
+
if (part === targetCase) return true;
|
|
102
|
+
if (part.includes(".") && part.split(".").includes(targetCase)) return true;
|
|
103
|
+
}
|
|
104
|
+
return false;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
function filterGeneratedForms(items, base, targetCase) {
|
|
108
|
+
const matchCase = items.filter((it) => tagHasCase(it.tag, targetCase));
|
|
109
|
+
if (matchCase.length < 1) return matchCase;
|
|
110
|
+
|
|
111
|
+
const withNumber = base.number
|
|
112
|
+
? matchCase.filter((it) => String(it.tag || "").split(":").includes(base.number))
|
|
113
|
+
: matchCase;
|
|
114
|
+
|
|
115
|
+
const withPos = base.pos
|
|
116
|
+
? withNumber.filter((it) => String(it.tag || "").split(":")[0] === base.pos)
|
|
117
|
+
: withNumber;
|
|
118
|
+
|
|
119
|
+
const withGender =
|
|
120
|
+
base.gender && withPos.length
|
|
121
|
+
? withPos.filter((it) => String(it.tag || "").split(":").includes(base.gender))
|
|
122
|
+
: withPos;
|
|
123
|
+
|
|
124
|
+
if (withGender.length) return withGender;
|
|
125
|
+
if (withPos.length) return withPos;
|
|
126
|
+
if (withNumber.length) return withNumber;
|
|
127
|
+
return matchCase;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function pickBestInterpretation(word, opts) {
|
|
131
|
+
const analysis = morfeusz.analyse(word, { returnGraph: false, ...(opts || {}) });
|
|
132
|
+
if (!analysis || !Array.isArray(analysis.tokens) || analysis.tokens.length < 1) {
|
|
133
|
+
throw new Error("Morfeusz returned an empty analysis result");
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
const token = analysis.tokens.find((t) => t && t.token === word) || analysis.tokens[0];
|
|
137
|
+
const interpretations = (token && token.interpretations) || [];
|
|
138
|
+
if (interpretations.length < 1) {
|
|
139
|
+
throw new Error(`No interpretations for "${word}"`);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return interpretations
|
|
143
|
+
.slice()
|
|
144
|
+
.sort((a, b) => scoreInterpretation(b) - scoreInterpretation(a))[0];
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
function generateOne(word, targetCase, opts) {
|
|
148
|
+
const options = opts || {};
|
|
149
|
+
const lemma = options.lemma;
|
|
150
|
+
const tag = options.tag;
|
|
151
|
+
|
|
152
|
+
let chosenLemma = lemma;
|
|
153
|
+
let chosenTag = tag;
|
|
154
|
+
|
|
155
|
+
if (!chosenLemma || !chosenTag) {
|
|
156
|
+
const best = pickBestInterpretation(word, options.analyseOpts);
|
|
157
|
+
chosenLemma = chosenLemma || best.lemma;
|
|
158
|
+
chosenTag = chosenTag || best.tag;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
const generated = morfeusz.generate(chosenLemma, {
|
|
162
|
+
includeTags: true,
|
|
163
|
+
dedupe: options.dedupe !== false,
|
|
164
|
+
returnAllByLemmaVariant: true
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
if (!Array.isArray(generated)) {
|
|
168
|
+
throw new Error("Morfeusz returned an invalid generate() result");
|
|
169
|
+
}
|
|
170
|
+
if (generated.length < 1) {
|
|
171
|
+
throw new Error(`No generated forms for lemma "${chosenLemma}"`);
|
|
172
|
+
}
|
|
173
|
+
if (typeof generated[0] === "string") {
|
|
174
|
+
throw new Error(
|
|
175
|
+
"Morfeusz generate() returned string forms, but tag info is required; ensure bindings support includeTags:true"
|
|
176
|
+
);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
const base = extractTagFeatures(chosenTag);
|
|
180
|
+
const forms = filterGeneratedForms(generated, base, targetCase).map((it) => it.form);
|
|
181
|
+
|
|
182
|
+
if (!forms || forms.length < 1) {
|
|
183
|
+
const hint = chosenLemma === word ? "" : ` (lemma="${chosenLemma}")`;
|
|
184
|
+
throw new Error(`No generated forms for "${word}" in case "${targetCase}"${hint}`);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
return options.all ? forms : forms[0];
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
function caseForm(word, caseNameOrCode, opts) {
|
|
191
|
+
const normalized = normalizeWord(word);
|
|
192
|
+
const targetCase = CASE_CODES.has(caseNameOrCode)
|
|
193
|
+
? caseNameOrCode
|
|
194
|
+
: normalizeCase(caseNameOrCode);
|
|
195
|
+
return generateOne(normalized, targetCase, opts);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
function allCases(word, opts) {
|
|
199
|
+
const normalized = normalizeWord(word);
|
|
200
|
+
const best = pickBestInterpretation(normalized, (opts && opts.analyseOpts) || undefined);
|
|
201
|
+
const lemma = (opts && opts.lemma) || best.lemma;
|
|
202
|
+
const tag = (opts && opts.tag) || best.tag;
|
|
203
|
+
|
|
204
|
+
const baseOpts = { ...(opts || {}), lemma, tag };
|
|
205
|
+
return {
|
|
206
|
+
nominative: generateOne(normalized, "nom", baseOpts),
|
|
207
|
+
genitive: generateOne(normalized, "gen", baseOpts),
|
|
208
|
+
dative: generateOne(normalized, "dat", baseOpts),
|
|
209
|
+
accusative: generateOne(normalized, "acc", baseOpts),
|
|
210
|
+
instrumental: generateOne(normalized, "inst", baseOpts),
|
|
211
|
+
locative: generateOne(normalized, "loc", baseOpts),
|
|
212
|
+
vocative: generateOne(normalized, "voc", baseOpts)
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
module.exports = {
|
|
217
|
+
version: morfeusz.version,
|
|
218
|
+
analyse: morfeusz.analyse,
|
|
219
|
+
generate: morfeusz.generate,
|
|
220
|
+
generateTagged: morfeusz.generateTagged,
|
|
221
|
+
setDictionary: morfeusz.setDictionary,
|
|
222
|
+
getDictionaryInfo: morfeusz.getDictionaryInfo,
|
|
223
|
+
|
|
224
|
+
case: caseForm,
|
|
225
|
+
cases: allCases,
|
|
226
|
+
|
|
227
|
+
nominative: (w, o) => caseForm(w, "nom", o),
|
|
228
|
+
genitive: (w, o) => caseForm(w, "gen", o),
|
|
229
|
+
dative: (w, o) => caseForm(w, "dat", o),
|
|
230
|
+
accusative: (w, o) => caseForm(w, "acc", o),
|
|
231
|
+
instrumental: (w, o) => caseForm(w, "inst", o),
|
|
232
|
+
locative: (w, o) => caseForm(w, "loc", o),
|
|
233
|
+
vocative: (w, o) => caseForm(w, "voc", o),
|
|
234
|
+
|
|
235
|
+
mianownik: (w, o) => caseForm(w, "nom", o),
|
|
236
|
+
dopelniacz: (w, o) => caseForm(w, "gen", o),
|
|
237
|
+
celownik: (w, o) => caseForm(w, "dat", o),
|
|
238
|
+
biernik: (w, o) => caseForm(w, "acc", o),
|
|
239
|
+
narzednik: (w, o) => caseForm(w, "inst", o),
|
|
240
|
+
miejscownik: (w, o) => caseForm(w, "loc", o),
|
|
241
|
+
wolacz: (w, o) => caseForm(w, "voc", o)
|
|
242
|
+
};
|
package/package.json
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "odmieniacz.js",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Simple Polish declension (cases) API powered by Morfeusz2.",
|
|
5
|
+
"main": "index.js",
|
|
6
|
+
"types": "index.d.ts",
|
|
7
|
+
"scripts": {
|
|
8
|
+
"test": "node scripts/smoke.js"
|
|
9
|
+
},
|
|
10
|
+
"dependencies": {
|
|
11
|
+
"morfeusz2-node-bindings": "^0.1.2"
|
|
12
|
+
},
|
|
13
|
+
"overrides": {
|
|
14
|
+
"tar": "7.5.3"
|
|
15
|
+
},
|
|
16
|
+
"engines": {
|
|
17
|
+
"node": ">=18"
|
|
18
|
+
},
|
|
19
|
+
"files": [
|
|
20
|
+
"index.js",
|
|
21
|
+
"index.d.ts",
|
|
22
|
+
"README.md",
|
|
23
|
+
"scripts/"
|
|
24
|
+
],
|
|
25
|
+
"keywords": [
|
|
26
|
+
"polish",
|
|
27
|
+
"morphology",
|
|
28
|
+
"declension",
|
|
29
|
+
"inflection",
|
|
30
|
+
"morfeusz",
|
|
31
|
+
"morfeusz2"
|
|
32
|
+
],
|
|
33
|
+
"license": "MIT"
|
|
34
|
+
}
|
package/scripts/smoke.js
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
const assert = require("node:assert/strict");
|
|
4
|
+
const odm = require("..");
|
|
5
|
+
|
|
6
|
+
console.log("version()", odm.version());
|
|
7
|
+
|
|
8
|
+
const dat = odm.dative("Marcin", { all: true });
|
|
9
|
+
const voc = odm.vocative("Marcin", { all: true });
|
|
10
|
+
const acc = odm.accusative("Marcin", { all: true });
|
|
11
|
+
|
|
12
|
+
console.log("dative(Marcin)", dat);
|
|
13
|
+
console.log("vocative(Marcin)", voc);
|
|
14
|
+
console.log("accusative(Marcin)", acc);
|
|
15
|
+
|
|
16
|
+
assert.ok(Array.isArray(dat) ? dat.includes("Marcinowi") : dat === "Marcinowi");
|
|
17
|
+
assert.ok(Array.isArray(voc) ? voc.includes("Marcinie") : voc === "Marcinie");
|
|
18
|
+
assert.ok(Array.isArray(acc) ? acc.includes("Marcina") : acc === "Marcina");
|
|
19
|
+
|
|
20
|
+
console.log("OK");
|
|
21
|
+
|