puzlink 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +21 -0
- package/README.md +35 -0
- package/dist/data/answerLengths.d.ts +10 -0
- package/dist/data/answerLengths.d.ts.map +1 -0
- package/dist/data/answerLengths.js +63 -0
- package/dist/data/answerLengths.js.map +1 -0
- package/dist/data/categories/compass.d.ts +3 -0
- package/dist/data/categories/compass.d.ts.map +1 -0
- package/dist/data/categories/compass.js +11 -0
- package/dist/data/categories/compass.js.map +1 -0
- package/dist/data/categories/countryAlpha2.d.ts +3 -0
- package/dist/data/categories/countryAlpha2.d.ts.map +1 -0
- package/dist/data/categories/countryAlpha2.js +252 -0
- package/dist/data/categories/countryAlpha2.js.map +1 -0
- package/dist/data/categories/countryAlpha3.d.ts +3 -0
- package/dist/data/categories/countryAlpha3.d.ts.map +1 -0
- package/dist/data/categories/countryAlpha3.js +252 -0
- package/dist/data/categories/countryAlpha3.js.map +1 -0
- package/dist/data/categories/daysOfTheWeek.d.ts +3 -0
- package/dist/data/categories/daysOfTheWeek.d.ts.map +1 -0
- package/dist/data/categories/daysOfTheWeek.js +10 -0
- package/dist/data/categories/daysOfTheWeek.js.map +1 -0
- package/dist/data/categories/elementSymbols.d.ts +3 -0
- package/dist/data/categories/elementSymbols.d.ts.map +1 -0
- package/dist/data/categories/elementSymbols.js +121 -0
- package/dist/data/categories/elementSymbols.js.map +1 -0
- package/dist/data/categories/greekLetters.d.ts +3 -0
- package/dist/data/categories/greekLetters.d.ts.map +1 -0
- package/dist/data/categories/greekLetters.js +27 -0
- package/dist/data/categories/greekLetters.js.map +1 -0
- package/dist/data/categories/months.d.ts +3 -0
- package/dist/data/categories/months.d.ts.map +1 -0
- package/dist/data/categories/months.js +15 -0
- package/dist/data/categories/months.js.map +1 -0
- package/dist/data/categories/natoAlphabet.d.ts +3 -0
- package/dist/data/categories/natoAlphabet.d.ts.map +1 -0
- package/dist/data/categories/natoAlphabet.js +29 -0
- package/dist/data/categories/natoAlphabet.js.map +1 -0
- package/dist/data/categories/numbers.d.ts +3 -0
- package/dist/data/categories/numbers.d.ts.map +1 -0
- package/dist/data/categories/numbers.js +16 -0
- package/dist/data/categories/numbers.js.map +1 -0
- package/dist/data/categories/romanNumerals.d.ts +3 -0
- package/dist/data/categories/romanNumerals.d.ts.map +1 -0
- package/dist/data/categories/romanNumerals.js +134 -0
- package/dist/data/categories/romanNumerals.js.map +1 -0
- package/dist/data/categories/solfege.d.ts +3 -0
- package/dist/data/categories/solfege.d.ts.map +1 -0
- package/dist/data/categories/solfege.js +11 -0
- package/dist/data/categories/solfege.js.map +1 -0
- package/dist/data/categories/usStateAbbreviations.d.ts +3 -0
- package/dist/data/categories/usStateAbbreviations.d.ts.map +1 -0
- package/dist/data/categories/usStateAbbreviations.js +53 -0
- package/dist/data/categories/usStateAbbreviations.js.map +1 -0
- package/dist/data/categories.d.ts +10 -0
- package/dist/data/categories.d.ts.map +1 -0
- package/dist/data/categories.js +31 -0
- package/dist/data/categories.js.map +1 -0
- package/dist/data/knownLogProbs.d.ts +6 -0
- package/dist/data/knownLogProbs.d.ts.map +1 -0
- package/dist/data/knownLogProbs.js +2975 -0
- package/dist/data/knownLogProbs.js.map +1 -0
- package/dist/data/morse.d.ts +2 -0
- package/dist/data/morse.d.ts.map +1 -0
- package/dist/data/morse.js +29 -0
- package/dist/data/morse.js.map +1 -0
- package/dist/data/scrabble.d.ts +2 -0
- package/dist/data/scrabble.d.ts.map +1 -0
- package/dist/data/scrabble.js +29 -0
- package/dist/data/scrabble.js.map +1 -0
- package/dist/features/index.d.ts +32 -0
- package/dist/features/index.d.ts.map +1 -0
- package/dist/features/index.js +79 -0
- package/dist/features/index.js.map +1 -0
- package/dist/features/letterCount.d.ts +7 -0
- package/dist/features/letterCount.d.ts.map +1 -0
- package/dist/features/letterCount.js +121 -0
- package/dist/features/letterCount.js.map +1 -0
- package/dist/features/letterSequence.d.ts +7 -0
- package/dist/features/letterSequence.d.ts.map +1 -0
- package/dist/features/letterSequence.js +155 -0
- package/dist/features/letterSequence.js.map +1 -0
- package/dist/features/logProbCache.d.ts +16 -0
- package/dist/features/logProbCache.d.ts.map +1 -0
- package/dist/features/logProbCache.js +36 -0
- package/dist/features/logProbCache.js.map +1 -0
- package/dist/features/other.d.ts +4 -0
- package/dist/features/other.d.ts.map +1 -0
- package/dist/features/other.js +190 -0
- package/dist/features/other.js.map +1 -0
- package/dist/features/substring.d.ts +3 -0
- package/dist/features/substring.d.ts.map +1 -0
- package/dist/features/substring.js +146 -0
- package/dist/features/substring.js.map +1 -0
- package/dist/features/wordplay.d.ts +7 -0
- package/dist/features/wordplay.d.ts.map +1 -0
- package/dist/features/wordplay.js +387 -0
- package/dist/features/wordplay.js.map +1 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +3 -0
- package/dist/index.js.map +1 -0
- package/dist/lib/affixDistribution.d.ts +26 -0
- package/dist/lib/affixDistribution.d.ts.map +1 -0
- package/dist/lib/affixDistribution.js +105 -0
- package/dist/lib/affixDistribution.js.map +1 -0
- package/dist/lib/counter.d.ts +23 -0
- package/dist/lib/counter.d.ts.map +1 -0
- package/dist/lib/counter.js +55 -0
- package/dist/lib/counter.js.map +1 -0
- package/dist/lib/distribution.d.ts +40 -0
- package/dist/lib/distribution.d.ts.map +1 -0
- package/dist/lib/distribution.js +176 -0
- package/dist/lib/distribution.js.map +1 -0
- package/dist/lib/lengthDistribution.d.ts +30 -0
- package/dist/lib/lengthDistribution.d.ts.map +1 -0
- package/dist/lib/lengthDistribution.js +137 -0
- package/dist/lib/lengthDistribution.js.map +1 -0
- package/dist/lib/letterBitset.d.ts +49 -0
- package/dist/lib/letterBitset.d.ts.map +1 -0
- package/dist/lib/letterBitset.js +101 -0
- package/dist/lib/letterBitset.js.map +1 -0
- package/dist/lib/letterDistribution.d.ts +60 -0
- package/dist/lib/letterDistribution.d.ts.map +1 -0
- package/dist/lib/letterDistribution.js +230 -0
- package/dist/lib/letterDistribution.js.map +1 -0
- package/dist/lib/letterIndices.d.ts +13 -0
- package/dist/lib/letterIndices.d.ts.map +1 -0
- package/dist/lib/letterIndices.js +41 -0
- package/dist/lib/letterIndices.js.map +1 -0
- package/dist/lib/logCounter.d.ts +23 -0
- package/dist/lib/logCounter.d.ts.map +1 -0
- package/dist/lib/logCounter.js +49 -0
- package/dist/lib/logCounter.js.map +1 -0
- package/dist/lib/logNum.d.ts +36 -0
- package/dist/lib/logNum.d.ts.map +1 -0
- package/dist/lib/logNum.js +193 -0
- package/dist/lib/logNum.js.map +1 -0
- package/dist/lib/memoize.d.ts +5 -0
- package/dist/lib/memoize.d.ts.map +1 -0
- package/dist/lib/memoize.js +104 -0
- package/dist/lib/memoize.js.map +1 -0
- package/dist/lib/util.d.ts +30 -0
- package/dist/lib/util.d.ts.map +1 -0
- package/dist/lib/util.js +111 -0
- package/dist/lib/util.js.map +1 -0
- package/dist/lib/wordlist.d.ts +66 -0
- package/dist/lib/wordlist.d.ts.map +1 -0
- package/dist/lib/wordlist.js +166 -0
- package/dist/lib/wordlist.js.map +1 -0
- package/dist/linkers/index.d.ts +34 -0
- package/dist/linkers/index.d.ts.map +1 -0
- package/dist/linkers/index.js +25 -0
- package/dist/linkers/index.js.map +1 -0
- package/dist/linkers/indexing.d.ts +5 -0
- package/dist/linkers/indexing.d.ts.map +1 -0
- package/dist/linkers/indexing.js +152 -0
- package/dist/linkers/indexing.js.map +1 -0
- package/dist/linkers/length.d.ts +5 -0
- package/dist/linkers/length.d.ts.map +1 -0
- package/dist/linkers/length.js +101 -0
- package/dist/linkers/length.js.map +1 -0
- package/dist/linkers/letterDistribution.d.ts +4 -0
- package/dist/linkers/letterDistribution.d.ts.map +1 -0
- package/dist/linkers/letterDistribution.js +46 -0
- package/dist/linkers/letterDistribution.js.map +1 -0
- package/dist/linkers/other.d.ts +5 -0
- package/dist/linkers/other.d.ts.map +1 -0
- package/dist/linkers/other.js +90 -0
- package/dist/linkers/other.js.map +1 -0
- package/dist/parse.d.ts +8 -0
- package/dist/parse.d.ts.map +1 -0
- package/dist/parse.js +23 -0
- package/dist/parse.js.map +1 -0
- package/dist/puzlink.d.ts +84 -0
- package/dist/puzlink.d.ts.map +1 -0
- package/dist/puzlink.js +59 -0
- package/dist/puzlink.js.map +1 -0
- package/package.json +57 -0
- package/src/data/answerLengths.ts +63 -0
- package/src/data/categories/README.md +3 -0
- package/src/data/categories/compass.ts +1 -0
- package/src/data/categories/countryAlpha2.ts +251 -0
- package/src/data/categories/countryAlpha3.ts +251 -0
- package/src/data/categories/daysOfTheWeek.ts +1 -0
- package/src/data/categories/elementSymbols.ts +120 -0
- package/src/data/categories/greekLetters.ts +26 -0
- package/src/data/categories/months.ts +14 -0
- package/src/data/categories/natoAlphabet.ts +28 -0
- package/src/data/categories/numbers.ts +15 -0
- package/src/data/categories/romanNumerals.ts +133 -0
- package/src/data/categories/solfege.ts +1 -0
- package/src/data/categories/txt/compass.txt +8 -0
- package/src/data/categories/txt/daysOfTheWeek.txt +7 -0
- package/src/data/categories/txt/elementSymbols.txt +118 -0
- package/src/data/categories/txt/greekLetters.txt +24 -0
- package/src/data/categories/txt/months.txt +12 -0
- package/src/data/categories/txt/natoAlphabet.txt +26 -0
- package/src/data/categories/txt/numbers.txt +13 -0
- package/src/data/categories/txt/solfege.txt +8 -0
- package/src/data/categories/txt/usStateAbbreviations.txt +50 -0
- package/src/data/categories/usStateAbbreviations.ts +52 -0
- package/src/data/categories.ts +42 -0
- package/src/data/knownLogProbs.ts +2992 -0
- package/src/data/morse.ts +28 -0
- package/src/data/scrabble.ts +28 -0
- package/src/features/index.ts +120 -0
- package/src/features/letterCount.ts +174 -0
- package/src/features/letterSequence.ts +222 -0
- package/src/features/logProbCache.ts +48 -0
- package/src/features/other.ts +214 -0
- package/src/features/substring.ts +173 -0
- package/src/features/wordplay.ts +428 -0
- package/src/index.ts +3 -0
- package/src/lib/affixDistribution.ts +70 -0
- package/src/lib/counter.ts +71 -0
- package/src/lib/distribution.ts +162 -0
- package/src/lib/lengthDistribution.ts +108 -0
- package/src/lib/letterBitset.ts +123 -0
- package/src/lib/letterDistribution.ts +236 -0
- package/src/lib/letterIndices.ts +51 -0
- package/src/lib/logCounter.ts +74 -0
- package/src/lib/logNum.ts +193 -0
- package/src/lib/memoize.ts +136 -0
- package/src/lib/testUtils.ts +1 -0
- package/src/lib/util.ts +150 -0
- package/src/lib/wordlist.ts +162 -0
- package/src/linkers/index.ts +56 -0
- package/src/linkers/indexing.ts +194 -0
- package/src/linkers/length.ts +122 -0
- package/src/linkers/other.ts +117 -0
- package/src/parse.ts +20 -0
- package/src/puzlink.ts +141 -0
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
export const morseLetter: Record<string, string> = {
|
|
2
|
+
a: ".-",
|
|
3
|
+
b: "-...",
|
|
4
|
+
c: "-.-.",
|
|
5
|
+
d: "-..",
|
|
6
|
+
e: ".",
|
|
7
|
+
f: "..-.",
|
|
8
|
+
g: "--.",
|
|
9
|
+
h: "....",
|
|
10
|
+
i: "..",
|
|
11
|
+
j: ".---",
|
|
12
|
+
k: "-.-",
|
|
13
|
+
l: ".-..",
|
|
14
|
+
m: "--",
|
|
15
|
+
n: "-.",
|
|
16
|
+
o: "---",
|
|
17
|
+
p: ".--.",
|
|
18
|
+
q: "--.-",
|
|
19
|
+
r: ".-.",
|
|
20
|
+
s: "...",
|
|
21
|
+
t: "-",
|
|
22
|
+
u: "..-",
|
|
23
|
+
v: "...-",
|
|
24
|
+
w: ".--",
|
|
25
|
+
x: "-..-",
|
|
26
|
+
y: "-.--",
|
|
27
|
+
z: "--..",
|
|
28
|
+
};
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
export const scrabbleLetterScore: Record<string, number> = {
|
|
2
|
+
e: 1,
|
|
3
|
+
a: 1,
|
|
4
|
+
i: 1,
|
|
5
|
+
o: 1,
|
|
6
|
+
n: 1,
|
|
7
|
+
r: 1,
|
|
8
|
+
t: 1,
|
|
9
|
+
l: 1,
|
|
10
|
+
s: 1,
|
|
11
|
+
u: 1,
|
|
12
|
+
d: 2,
|
|
13
|
+
g: 2,
|
|
14
|
+
b: 3,
|
|
15
|
+
c: 3,
|
|
16
|
+
m: 3,
|
|
17
|
+
p: 3,
|
|
18
|
+
f: 4,
|
|
19
|
+
h: 4,
|
|
20
|
+
v: 4,
|
|
21
|
+
w: 4,
|
|
22
|
+
y: 4,
|
|
23
|
+
k: 5,
|
|
24
|
+
j: 8,
|
|
25
|
+
x: 8,
|
|
26
|
+
q: 10,
|
|
27
|
+
z: 10,
|
|
28
|
+
};
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import { LetterIndices } from "../lib/letterIndices.js";
|
|
2
|
+
import { LogNum } from "../lib/logNum.js";
|
|
3
|
+
import { Wordlist } from "../lib/wordlist.js";
|
|
4
|
+
import type { Linker } from "../linkers/index.js";
|
|
5
|
+
import { letterCountFeatures } from "./letterCount.js";
|
|
6
|
+
import { letterSequenceFeatures } from "./letterSequence.js";
|
|
7
|
+
import { KnownLogProbs } from "./logProbCache.js";
|
|
8
|
+
import { otherFeatures } from "./other.js";
|
|
9
|
+
import { substringFeatures } from "./substring.js";
|
|
10
|
+
import { wordplayFeatures } from "./wordplay.js";
|
|
11
|
+
|
|
12
|
+
type Props = {
|
|
13
|
+
letterIndices: LetterIndices;
|
|
14
|
+
wordlist: Wordlist;
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* A Feature is a property that a slug can have.
|
|
19
|
+
*
|
|
20
|
+
* Features should be as specific as possible, like "can prepend T to get a
|
|
21
|
+
* word", rather than "can prepend a letter to get a word". Instead, use
|
|
22
|
+
* linkers to get more general features.
|
|
23
|
+
*/
|
|
24
|
+
export type Feature = {
|
|
25
|
+
/** The name of the feature; will be used for the linker name. */
|
|
26
|
+
name: string;
|
|
27
|
+
/**
|
|
28
|
+
* If the `slug` has the feature, returns a description with `slug` as the
|
|
29
|
+
* subject. This is an elaboration on the feature name.
|
|
30
|
+
*/
|
|
31
|
+
property: (slug: string, props: Props) => string | null;
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
function getProps(wordlist: Wordlist, slug: string): Props {
|
|
35
|
+
return { letterIndices: LetterIndices.from(slug), wordlist };
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Create a binomial linker for a given feature. A binomial link is the
|
|
40
|
+
* probability that at least k or at most k out of n words share the feature,
|
|
41
|
+
* whichever is less.
|
|
42
|
+
*/
|
|
43
|
+
function featureLinker(
|
|
44
|
+
wordlist: Wordlist,
|
|
45
|
+
{ name, property }: Feature,
|
|
46
|
+
): Linker | null {
|
|
47
|
+
let featureLogProb = KnownLogProbs.get(name, () => {
|
|
48
|
+
return wordlist.logProb(
|
|
49
|
+
(word) => property(word, getProps(wordlist, word)) !== null,
|
|
50
|
+
);
|
|
51
|
+
});
|
|
52
|
+
if (featureLogProb.toLog() === -Infinity) {
|
|
53
|
+
// We can't meaningfully make linkers out of zero-probability things,
|
|
54
|
+
// so just set it to something very small.
|
|
55
|
+
featureLogProb = LogNum.fromExp(-10);
|
|
56
|
+
return null;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
return {
|
|
60
|
+
name,
|
|
61
|
+
eval: (slugs, options) => {
|
|
62
|
+
const description = slugs.flatMap((word) => {
|
|
63
|
+
const result = property(word, getProps(wordlist, word));
|
|
64
|
+
return result ? [result] : [];
|
|
65
|
+
});
|
|
66
|
+
if (
|
|
67
|
+
description.length !== 0 &&
|
|
68
|
+
description.length < options.minFeatureRatio * slugs.length
|
|
69
|
+
) {
|
|
70
|
+
return [];
|
|
71
|
+
}
|
|
72
|
+
const logProb = LogNum.binomialPValue(
|
|
73
|
+
description.length,
|
|
74
|
+
slugs.length,
|
|
75
|
+
featureLogProb,
|
|
76
|
+
);
|
|
77
|
+
return [
|
|
78
|
+
{
|
|
79
|
+
name: `${description.length.toString()}/${slugs.length.toString()} ${name}`,
|
|
80
|
+
description,
|
|
81
|
+
logProb,
|
|
82
|
+
},
|
|
83
|
+
];
|
|
84
|
+
},
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/** Feature-based linkers. */
|
|
89
|
+
export function featureLinkers(wordlist: Wordlist): Linker[] {
|
|
90
|
+
return [
|
|
91
|
+
...letterCountFeatures(),
|
|
92
|
+
...letterSequenceFeatures(),
|
|
93
|
+
...otherFeatures(),
|
|
94
|
+
...substringFeatures(),
|
|
95
|
+
...wordplayFeatures(),
|
|
96
|
+
].flatMap((feature) => {
|
|
97
|
+
const linker = featureLinker(wordlist, feature);
|
|
98
|
+
return linker ? [linker] : [];
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* For testing purposes. Takes a list of features, and returns
|
|
104
|
+
* a function that takes a slug and returns all features it satisfies.
|
|
105
|
+
*/
|
|
106
|
+
export function makeFeatureGetter(
|
|
107
|
+
features: Feature[],
|
|
108
|
+
wordlist: Wordlist,
|
|
109
|
+
): (slug: string) => Record<string, string> {
|
|
110
|
+
return (slug) => {
|
|
111
|
+
const properties: Record<string, string> = {};
|
|
112
|
+
for (const feature of features) {
|
|
113
|
+
const property = feature.property(slug, getProps(wordlist, slug));
|
|
114
|
+
if (property) {
|
|
115
|
+
properties[feature.name] = property;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
return properties;
|
|
119
|
+
};
|
|
120
|
+
}
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
import { Counter } from "../lib/counter.js";
|
|
2
|
+
import { CONSONANTS, LETTERS, VOWELS } from "../lib/letterDistribution.js";
|
|
3
|
+
import {
|
|
4
|
+
capitalizeAt,
|
|
5
|
+
getArithmeticSequenceInfo,
|
|
6
|
+
interval,
|
|
7
|
+
mapProduct,
|
|
8
|
+
windows,
|
|
9
|
+
} from "../lib/util.js";
|
|
10
|
+
import type { Feature } from "./index.js";
|
|
11
|
+
|
|
12
|
+
function withTimes(letter: string, times: number, strict: boolean): Feature {
|
|
13
|
+
return {
|
|
14
|
+
name: strict
|
|
15
|
+
? `has ${times.toString()} ${letter}`
|
|
16
|
+
: `has at least ${times.toString()} ${letter}`,
|
|
17
|
+
property: (slug, { letterIndices }) => {
|
|
18
|
+
const starts = letterIndices.get(letter);
|
|
19
|
+
if (strict ? starts.length !== times : starts.length < times) {
|
|
20
|
+
return null;
|
|
21
|
+
}
|
|
22
|
+
return capitalizeAt(slug, starts);
|
|
23
|
+
},
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function uniqueOf(
|
|
28
|
+
kind: { name: string; letters: string },
|
|
29
|
+
times: number,
|
|
30
|
+
): Feature {
|
|
31
|
+
return {
|
|
32
|
+
name: `has ${times.toString()} unique ${kind.name}`,
|
|
33
|
+
property: (slug, { letterIndices }) => {
|
|
34
|
+
const unique = letterIndices
|
|
35
|
+
.filterKeys((letter) => kind.letters.includes(letter))
|
|
36
|
+
.sort()
|
|
37
|
+
.join("");
|
|
38
|
+
if (unique.length !== times) {
|
|
39
|
+
return null;
|
|
40
|
+
}
|
|
41
|
+
return `${slug} ${kind.name}: ${unique}`;
|
|
42
|
+
},
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function nGramRepeatsTimes(
|
|
47
|
+
kind: { name: string; n: number },
|
|
48
|
+
count: number,
|
|
49
|
+
repeats: number,
|
|
50
|
+
strict: boolean,
|
|
51
|
+
): Feature {
|
|
52
|
+
return {
|
|
53
|
+
name: strict
|
|
54
|
+
? `has ${count.toString()} ${kind.name}, each repeating ${repeats.toString()} times`
|
|
55
|
+
: `has ${count.toString()} ${kind.name}, each repeating at least ${repeats.toString()} times`,
|
|
56
|
+
property: (slug) => {
|
|
57
|
+
const counts = Counter.from(
|
|
58
|
+
Array.from(windows(slug, kind.n), (w) => w.join("")),
|
|
59
|
+
);
|
|
60
|
+
const nGrams = counts.filterKeys((_, count) =>
|
|
61
|
+
strict ? count === repeats : count >= repeats,
|
|
62
|
+
);
|
|
63
|
+
if (nGrams.length !== count) {
|
|
64
|
+
return null;
|
|
65
|
+
}
|
|
66
|
+
return `${slug}: ${nGrams.join(", ")}`;
|
|
67
|
+
},
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function repeatedOf(kind: { name: string; letters: string }): Feature {
|
|
72
|
+
return {
|
|
73
|
+
name: `has repeated ${kind.name}`,
|
|
74
|
+
property: (slug, { letterIndices }) => {
|
|
75
|
+
const repeated = letterIndices.filterKeys(
|
|
76
|
+
(letter, indices) =>
|
|
77
|
+
kind.letters.includes(letter) && indices.length >= 2,
|
|
78
|
+
);
|
|
79
|
+
return repeated.length > 0 ? `${slug}: ${repeated.join(", ")}` : null;
|
|
80
|
+
},
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function equalCounts(): Feature {
|
|
85
|
+
return {
|
|
86
|
+
name: "has equal letter counts",
|
|
87
|
+
property: (slug, { letterIndices }) => {
|
|
88
|
+
const countSet = letterIndices.countSet();
|
|
89
|
+
return countSet.size === 1
|
|
90
|
+
? `${slug} letter counts are all ${Array.from(countSet)[0]!.toString()}`
|
|
91
|
+
: null;
|
|
92
|
+
},
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
function twoCounts(): Feature {
|
|
97
|
+
return {
|
|
98
|
+
name: "has one of two letter counts",
|
|
99
|
+
property: (slug, { letterIndices }) => {
|
|
100
|
+
const countSet = letterIndices.countSet();
|
|
101
|
+
if (countSet.size !== 2) {
|
|
102
|
+
return null;
|
|
103
|
+
}
|
|
104
|
+
const [a, b] = Array.from(countSet) as [number, number];
|
|
105
|
+
const aLetters = letterIndices.filterKeys(
|
|
106
|
+
(_, indices) => indices.length === a,
|
|
107
|
+
);
|
|
108
|
+
const bLetters = letterIndices.filterKeys(
|
|
109
|
+
(_, indices) => indices.length === b,
|
|
110
|
+
);
|
|
111
|
+
return `${slug} letter counts are ${a.toString()} (${aLetters.sort().join("")}) or ${b.toString()} (${bLetters.sort().join("")})`;
|
|
112
|
+
},
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function arithmeticSequenceCounts(): Feature {
|
|
117
|
+
return {
|
|
118
|
+
name: "has letter counts in arithmetic sequence",
|
|
119
|
+
property: (slug, { letterIndices }) => {
|
|
120
|
+
const sortedCounts = Array.from(letterIndices.counts()).sort(
|
|
121
|
+
([, a], [, b]) => a - b,
|
|
122
|
+
);
|
|
123
|
+
if (!getArithmeticSequenceInfo(sortedCounts.map(([, c]) => c))) {
|
|
124
|
+
return null;
|
|
125
|
+
}
|
|
126
|
+
const letters = sortedCounts.map(([l]) => l).join(", ");
|
|
127
|
+
const counts = sortedCounts.map(([, c]) => c).join(", ");
|
|
128
|
+
return `${slug} letter counts of ${letters} are ${counts}`;
|
|
129
|
+
},
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Features for letter counts: things we can remark solely based on the
|
|
135
|
+
* histogram of letters/bigrams/trigrams in the slug.
|
|
136
|
+
*/
|
|
137
|
+
export function letterCountFeatures(): Feature[] {
|
|
138
|
+
return [
|
|
139
|
+
...mapProduct(withTimes, LETTERS, interval(1, 5), [true, false]),
|
|
140
|
+
...mapProduct(
|
|
141
|
+
uniqueOf,
|
|
142
|
+
[{ name: "vowels", letters: VOWELS }],
|
|
143
|
+
interval(0, 5),
|
|
144
|
+
),
|
|
145
|
+
...mapProduct(
|
|
146
|
+
uniqueOf,
|
|
147
|
+
[{ name: "consonants", letters: CONSONANTS }],
|
|
148
|
+
interval(1, 15),
|
|
149
|
+
),
|
|
150
|
+
...mapProduct(
|
|
151
|
+
uniqueOf,
|
|
152
|
+
[{ name: "letters", letters: LETTERS }],
|
|
153
|
+
interval(1, 26),
|
|
154
|
+
),
|
|
155
|
+
...mapProduct(
|
|
156
|
+
nGramRepeatsTimes,
|
|
157
|
+
[
|
|
158
|
+
{ name: "letters", n: 1 },
|
|
159
|
+
{ name: "bigrams", n: 2 },
|
|
160
|
+
{ name: "trigrams", n: 3 },
|
|
161
|
+
],
|
|
162
|
+
interval(1, 5),
|
|
163
|
+
interval(2, 4),
|
|
164
|
+
[true, false],
|
|
165
|
+
),
|
|
166
|
+
...mapProduct(repeatedOf, [
|
|
167
|
+
{ name: "vowels", letters: VOWELS },
|
|
168
|
+
{ name: "consonants", letters: CONSONANTS },
|
|
169
|
+
]),
|
|
170
|
+
equalCounts(),
|
|
171
|
+
twoCounts(),
|
|
172
|
+
arithmeticSequenceCounts(),
|
|
173
|
+
];
|
|
174
|
+
}
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
import { CONSONANTS, LETTERS, VOWELS } from "../lib/letterDistribution.js";
|
|
2
|
+
import {
|
|
3
|
+
enumerate,
|
|
4
|
+
interval,
|
|
5
|
+
mapProduct,
|
|
6
|
+
printIndexSlug,
|
|
7
|
+
} from "../lib/util.js";
|
|
8
|
+
import type { Feature } from "./index.js";
|
|
9
|
+
|
|
10
|
+
// TODO: make the printed property names better here; some of these should be
|
|
11
|
+
// more than just the raw index. evaluate after looking at integration tests
|
|
12
|
+
|
|
13
|
+
function equalWithDistanceTimes(
|
|
14
|
+
letter: string,
|
|
15
|
+
distance: number,
|
|
16
|
+
times: number,
|
|
17
|
+
): Feature {
|
|
18
|
+
return {
|
|
19
|
+
name: `has ${letter} with ${distance.toString()} letters between, ${times.toString()} times`,
|
|
20
|
+
property: (slug) => {
|
|
21
|
+
const starts = interval(0, slug.length - distance - 1).filter((i) => {
|
|
22
|
+
return slug[i] === letter && slug[i + distance + 1] === letter;
|
|
23
|
+
});
|
|
24
|
+
if (starts.length !== times) {
|
|
25
|
+
return null;
|
|
26
|
+
}
|
|
27
|
+
return printIndexSlug(
|
|
28
|
+
slug,
|
|
29
|
+
starts.flatMap((i) => [i, i + distance + 1]),
|
|
30
|
+
);
|
|
31
|
+
},
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function unequalWithDistance(a: string, b: string, distance: number): Feature {
|
|
36
|
+
return {
|
|
37
|
+
name: `has ${a} and ${b} with ${distance.toString()} letters between`,
|
|
38
|
+
property: (slug) => {
|
|
39
|
+
if (a === b) {
|
|
40
|
+
return null;
|
|
41
|
+
}
|
|
42
|
+
const starts = interval(0, slug.length - distance - 1).filter((i) => {
|
|
43
|
+
return slug[i] === a && slug[i + distance + 1] === b;
|
|
44
|
+
});
|
|
45
|
+
if (starts.length === 0) {
|
|
46
|
+
return null;
|
|
47
|
+
}
|
|
48
|
+
return printIndexSlug(
|
|
49
|
+
slug,
|
|
50
|
+
starts.flatMap((i) => [i, i + distance + 1]),
|
|
51
|
+
);
|
|
52
|
+
},
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function equalAnyDistanceTimes(
|
|
57
|
+
distance: number,
|
|
58
|
+
times: number,
|
|
59
|
+
strict: boolean,
|
|
60
|
+
): Feature {
|
|
61
|
+
return {
|
|
62
|
+
name: strict
|
|
63
|
+
? `has equal letters with ${distance.toString()} letters between, ${times.toString()} times`
|
|
64
|
+
: `has equal letters with ${distance.toString()} letters between, at least ${times.toString()} times`,
|
|
65
|
+
property: (slug) => {
|
|
66
|
+
const starts = interval(0, slug.length - distance - 1).filter((i) => {
|
|
67
|
+
return slug[i] === slug[i + distance + 1];
|
|
68
|
+
});
|
|
69
|
+
if (strict ? starts.length !== times : starts.length < times) {
|
|
70
|
+
return null;
|
|
71
|
+
}
|
|
72
|
+
return printIndexSlug(
|
|
73
|
+
slug,
|
|
74
|
+
starts.flatMap((i) => [i, i + distance + 1]),
|
|
75
|
+
);
|
|
76
|
+
},
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function bigramOfTimes(
|
|
81
|
+
bigram: { type: string; check: (a: string, b: string) => boolean },
|
|
82
|
+
times: number,
|
|
83
|
+
strict: boolean,
|
|
84
|
+
): Feature {
|
|
85
|
+
return {
|
|
86
|
+
name: strict
|
|
87
|
+
? `has ${times.toString()} ${bigram.type} bigrams`
|
|
88
|
+
: `has at least ${times.toString()} ${bigram.type} bigrams`,
|
|
89
|
+
property: (slug) => {
|
|
90
|
+
const starts = interval(0, slug.length - 2).filter((i) => {
|
|
91
|
+
return bigram.check(slug[i]!, slug[i + 1]!);
|
|
92
|
+
});
|
|
93
|
+
if (strict ? starts.length !== times : starts.length < times) {
|
|
94
|
+
return null;
|
|
95
|
+
}
|
|
96
|
+
return printIndexSlug(
|
|
97
|
+
slug,
|
|
98
|
+
starts.flatMap((i) => [i, i + 1]),
|
|
99
|
+
);
|
|
100
|
+
},
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const bigram = {
|
|
105
|
+
alpha: {
|
|
106
|
+
type: "alphabetical",
|
|
107
|
+
check: (a, b) => a < b,
|
|
108
|
+
},
|
|
109
|
+
revAlpha: {
|
|
110
|
+
type: "reverse alphabetical",
|
|
111
|
+
check: (a, b) => a > b,
|
|
112
|
+
},
|
|
113
|
+
seq: {
|
|
114
|
+
type: "sequential",
|
|
115
|
+
check: (a, b) => a.charCodeAt(0) - b.charCodeAt(0) === -1,
|
|
116
|
+
},
|
|
117
|
+
revSeq: {
|
|
118
|
+
type: "reverse sequential",
|
|
119
|
+
check: (a, b) => a.charCodeAt(0) - b.charCodeAt(0) === 1,
|
|
120
|
+
},
|
|
121
|
+
} as const satisfies Record<
|
|
122
|
+
string,
|
|
123
|
+
{ type: string; check: (a: string, b: string) => boolean }
|
|
124
|
+
>;
|
|
125
|
+
|
|
126
|
+
function consecutiveOfTimes(
|
|
127
|
+
kind: { name: string; letters: string },
|
|
128
|
+
times: number,
|
|
129
|
+
strict: boolean,
|
|
130
|
+
): Feature {
|
|
131
|
+
return {
|
|
132
|
+
name: strict
|
|
133
|
+
? `has ${times.toString()} ${kind.name} in a row`
|
|
134
|
+
: `has at least ${times.toString()} ${kind.name} in a row`,
|
|
135
|
+
property: (slug) => {
|
|
136
|
+
let bestStreak = 0;
|
|
137
|
+
let bestStart = -1;
|
|
138
|
+
let currentStreak = 0;
|
|
139
|
+
for (const [i, letter] of enumerate(slug)) {
|
|
140
|
+
if (kind.letters.includes(letter)) {
|
|
141
|
+
currentStreak++;
|
|
142
|
+
if (currentStreak > bestStreak) {
|
|
143
|
+
bestStreak = currentStreak;
|
|
144
|
+
bestStart = i - currentStreak + 1;
|
|
145
|
+
}
|
|
146
|
+
} else {
|
|
147
|
+
currentStreak = 0;
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
if (strict ? bestStreak !== times : bestStreak < times) {
|
|
151
|
+
return null;
|
|
152
|
+
}
|
|
153
|
+
return printIndexSlug(
|
|
154
|
+
slug,
|
|
155
|
+
interval(bestStart, bestStart + bestStreak - 1),
|
|
156
|
+
);
|
|
157
|
+
},
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// TODO: does this belong somewhere else?
|
|
162
|
+
function bookendsOf(length: number): Feature {
|
|
163
|
+
return {
|
|
164
|
+
name: `starts and ends with the same ${length.toString()} letters`,
|
|
165
|
+
property: (slug) => {
|
|
166
|
+
if (slug.length < length * 2) {
|
|
167
|
+
return null;
|
|
168
|
+
}
|
|
169
|
+
if (slug.slice(0, length) !== slug.slice(-length)) {
|
|
170
|
+
return null;
|
|
171
|
+
}
|
|
172
|
+
return printIndexSlug(slug, [
|
|
173
|
+
...interval(0, length - 1),
|
|
174
|
+
...interval(slug.length - length, slug.length - 1),
|
|
175
|
+
]);
|
|
176
|
+
},
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Features for letter sequences: things we can remark based on the relative
|
|
182
|
+
* order of the letters/bigrams/trigrams within the slug.
|
|
183
|
+
*/
|
|
184
|
+
export function letterSequenceFeatures(): Feature[] {
|
|
185
|
+
return [
|
|
186
|
+
...mapProduct(equalWithDistanceTimes, LETTERS, [0, 1, 2, 3], [1]),
|
|
187
|
+
...mapProduct(equalWithDistanceTimes, LETTERS, [0, 1], [2, 3]),
|
|
188
|
+
...mapProduct(unequalWithDistance, LETTERS, LETTERS, [0, 1]),
|
|
189
|
+
...mapProduct(
|
|
190
|
+
equalAnyDistanceTimes,
|
|
191
|
+
[0, 1, 2, 3],
|
|
192
|
+
[1, 2, 3],
|
|
193
|
+
[true, false],
|
|
194
|
+
),
|
|
195
|
+
...mapProduct(
|
|
196
|
+
bigramOfTimes,
|
|
197
|
+
[bigram.alpha, bigram.revAlpha],
|
|
198
|
+
interval(1, 10),
|
|
199
|
+
[true, false],
|
|
200
|
+
),
|
|
201
|
+
...mapProduct(bigramOfTimes, [bigram.seq, bigram.revSeq], interval(1, 5), [
|
|
202
|
+
true,
|
|
203
|
+
false,
|
|
204
|
+
]),
|
|
205
|
+
...mapProduct(
|
|
206
|
+
bigramOfTimes,
|
|
207
|
+
[bigram.alpha, bigram.revAlpha, bigram.seq, bigram.revSeq],
|
|
208
|
+
[0],
|
|
209
|
+
[true],
|
|
210
|
+
),
|
|
211
|
+
...mapProduct(
|
|
212
|
+
consecutiveOfTimes,
|
|
213
|
+
[
|
|
214
|
+
{ name: "vowels", letters: VOWELS },
|
|
215
|
+
{ name: "consonants", letters: CONSONANTS },
|
|
216
|
+
],
|
|
217
|
+
interval(2, 5),
|
|
218
|
+
[true, false],
|
|
219
|
+
),
|
|
220
|
+
...mapProduct(bookendsOf, [1, 2, 3]),
|
|
221
|
+
];
|
|
222
|
+
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import { knownLogProbs } from "../data/knownLogProbs.js";
|
|
2
|
+
import { LogNum } from "../lib/logNum.js";
|
|
3
|
+
|
|
4
|
+
class LogProbCache {
|
|
5
|
+
useCache: boolean;
|
|
6
|
+
knownLogProbs: Record<string, LogNum>;
|
|
7
|
+
wrapCompute: (
|
|
8
|
+
name: string,
|
|
9
|
+
fn: () => LogNum,
|
|
10
|
+
existing: LogNum | undefined,
|
|
11
|
+
) => LogNum;
|
|
12
|
+
|
|
13
|
+
constructor(data: Record<string, number>) {
|
|
14
|
+
this.useCache = true;
|
|
15
|
+
this.knownLogProbs = {};
|
|
16
|
+
for (const [name, logProb] of Object.entries(data)) {
|
|
17
|
+
this.knownLogProbs[name] = LogNum.fromExp(logProb);
|
|
18
|
+
}
|
|
19
|
+
this.wrapCompute = (_, fn) => fn();
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
*dump(): Generator<string> {
|
|
23
|
+
yield `export const knownLogProbs: Record<string, number> = {`;
|
|
24
|
+
for (const [name, logProb] of Object.entries(this.knownLogProbs)) {
|
|
25
|
+
yield ` "${name}": ${logProb.toLog().toString()},`;
|
|
26
|
+
}
|
|
27
|
+
yield `};`;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
get(name: string, defaultLogProb: () => LogNum): LogNum {
|
|
31
|
+
if (this.useCache && name in this.knownLogProbs) {
|
|
32
|
+
return this.knownLogProbs[name]!;
|
|
33
|
+
}
|
|
34
|
+
const logProb = this.wrapCompute(
|
|
35
|
+
name,
|
|
36
|
+
defaultLogProb,
|
|
37
|
+
this.knownLogProbs[name],
|
|
38
|
+
);
|
|
39
|
+
this.knownLogProbs[name] = logProb;
|
|
40
|
+
return logProb;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* We wrap the log prob cache in a class so we can do stuff like print
|
|
46
|
+
* debug output and whatever.
|
|
47
|
+
*/
|
|
48
|
+
export const KnownLogProbs = new LogProbCache(knownLogProbs);
|