@scrabble-solver/word-definitions 2.9.1 → 2.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/LICENSE +1 -1
  2. package/build/getWordDefinition.d.ts +1 -1
  3. package/build/getWordDefinition.js +3 -3
  4. package/build/lib/normalizeDefinition.js +3 -1
  5. package/build/parse/parse.js +2 -2
  6. package/build/parse/parseEnglish.js +7 -7
  7. package/build/parse/parseFrench.js +3 -6
  8. package/build/parse/parseGerman.js +54 -16
  9. package/build/parse/parsePolish.js +3 -6
  10. package/build/parse/parseSpanish.js +3 -6
  11. package/build/types.d.ts +1 -1
  12. package/package.json +5 -5
  13. package/src/getWordDefinition.ts +3 -3
  14. package/src/lib/normalizeDefinition.ts +4 -1
  15. package/src/parse/__tests__/expected/de-DE.hm.json +4 -0
  16. package/src/parse/__tests__/expected/de-DE.ho.json +9 -0
  17. package/src/parse/__tests__/expected/de-DE.kolla.json +4 -0
  18. package/src/parse/__tests__/expected/de-DE.vom.json +16 -0
  19. package/src/parse/__tests__/expected/en-US.awe.json +1 -1
  20. package/src/parse/__tests__/expected/en-US.pawn.json +1 -1
  21. package/src/parse/__tests__/expected/en-US.pawnee.json +1 -1
  22. package/src/parse/__tests__/expected/en-US.pean.json +1 -1
  23. package/src/parse/__tests__/expected/en-US.wiz.json +1 -1
  24. package/src/parse/__tests__/expected/es-ES.corma.json +1 -1
  25. package/src/parse/__tests__/expected/es-ES.portero.json +1 -1
  26. package/src/parse/__tests__/input/de-DE.hm.html +873 -0
  27. package/src/parse/__tests__/input/de-DE.ho.html +1144 -0
  28. package/src/parse/__tests__/input/de-DE.kolla.html +980 -0
  29. package/src/parse/__tests__/input/de-DE.vom.html +738 -0
  30. package/src/parse/parse.test.ts +4 -0
  31. package/src/parse/parse.ts +2 -2
  32. package/src/parse/parseEnglish.ts +8 -4
  33. package/src/parse/parseFrench.ts +3 -3
  34. package/src/parse/parseGerman.ts +71 -12
  35. package/src/parse/parsePolish.ts +3 -3
  36. package/src/parse/parseSpanish.ts +3 -3
  37. package/src/types.ts +1 -1
package/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2021 Kamil Mielnik <kamil.adam.mielnik@gmail.com>
1
+ Copyright (c) 2022 Kamil Mielnik <kamil@kamilmielnik.com>
2
2
 
3
3
  Attribution-NonCommercial-NoDerivatives 4.0 International
4
4
 
@@ -1,3 +1,3 @@
1
1
  import { Locale, WordDefinition } from '@scrabble-solver/types';
2
- declare const getWordDefinition: (locale: Locale, word: string) => Promise<WordDefinition>;
2
+ declare const getWordDefinition: (locale: Locale, word: string, isAllowed: boolean) => Promise<WordDefinition>;
3
3
  export default getWordDefinition;
@@ -3,10 +3,10 @@ Object.defineProperty(exports, "__esModule", { value: true });
3
3
  const types_1 = require("@scrabble-solver/types");
4
4
  const crawl_1 = require("./crawl");
5
5
  const parse_1 = require("./parse");
6
- const getWordDefinition = async (locale, word) => {
6
+ const getWordDefinition = async (locale, word, isAllowed) => {
7
7
  const html = await (0, crawl_1.crawl)(locale, word);
8
- const { definitions, isAllowed } = (0, parse_1.parse)(locale, html);
9
- const wordDefinition = new types_1.WordDefinition({ definitions, isAllowed, word });
8
+ const { definitions, exists } = (0, parse_1.parse)(locale, html);
9
+ const wordDefinition = new types_1.WordDefinition({ definitions, exists, isAllowed, word });
10
10
  return wordDefinition;
11
11
  };
12
12
  exports.default = getWordDefinition;
@@ -6,7 +6,8 @@ Object.defineProperty(exports, "__esModule", { value: true });
6
6
  const striptags_1 = __importDefault(require("striptags"));
7
7
  const EMPHASIS_TAGS = ['a', 'b', 'em', 'internalXref'];
8
8
  const normalizeHtmlTags = (definition) => (0, striptags_1.default)((0, striptags_1.default)(definition, EMPHASIS_TAGS), undefined, '"');
9
- const normalizeLineBreaks = (definition) => definition.replace(/[\r\n]/g, '');
9
+ const normalizeLineBreaks = (definition) => definition.replace(/\r\n/g, '\n');
10
+ const normalizeWhitespace = (definition) => definition.replace(/[ ]+/g, ' ');
10
11
  const normalizeQuotes = (definition) => definition.replace(/\."/g, '".');
11
12
  /**
12
13
  * `(1.2) definition` -> `definition`
@@ -26,6 +27,7 @@ const normalizers = [
26
27
  normalizeMarkers,
27
28
  normalizeQuotes,
28
29
  normalizeLineBreaks,
30
+ normalizeWhitespace,
29
31
  normalizeTrailingSymbols,
30
32
  normalizeLeadingSymbols,
31
33
  normalizeNonWords,
@@ -19,10 +19,10 @@ const parsePerLocale = {
19
19
  [types_1.Locale.PL_PL]: parsePolish_1.default,
20
20
  };
21
21
  const parse = (locale, html) => {
22
- const { definitions, isAllowed } = parsePerLocale[locale](html);
22
+ const { definitions, exists } = parsePerLocale[locale](html);
23
23
  return {
24
24
  definitions: (0, lib_1.unique)(definitions.map(lib_1.normalizeDefinition).filter(Boolean)),
25
- isAllowed,
25
+ exists,
26
26
  };
27
27
  };
28
28
  exports.default = parse;
@@ -1,18 +1,18 @@
1
1
  "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
2
  Object.defineProperty(exports, "__esModule", { value: true });
6
- const cheerio_1 = __importDefault(require("cheerio"));
3
+ const cheerio_1 = require("cheerio");
4
+ const DOES_NOT_EXIST_MESSAGE =
5
+ // eslint-disable-next-line max-len
6
+ "The word you've entered isn't in the dictionary. Click on a spelling suggestion below or try again using the search bar above.";
7
7
  const parseEnglish = (html) => {
8
- const $ = cheerio_1.default.load(html);
8
+ const $ = (0, cheerio_1.load)(html);
9
9
  $('strong.mw_t_bc').replaceWith(', ');
10
10
  $('.text-lowercase').remove();
11
11
  $('[id^=dictionary-entry]').find('.dtText > *:not(a)').remove();
12
12
  const $definitions = $('[id^=dictionary-entry]').find('.dtText, .cxl-ref');
13
13
  return {
14
- definitions: Array.from($definitions).map((definition) => $(definition).text()),
15
- isAllowed: $definitions.length > 0,
14
+ definitions: Array.from($definitions).map((definition) => $(definition).text().replace(/\n/g, '')),
15
+ exists: $('.spelling-suggestion-text').text().trim() !== DOES_NOT_EXIST_MESSAGE,
16
16
  };
17
17
  };
18
18
  exports.default = parseEnglish;
@@ -1,15 +1,12 @@
1
1
  "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
2
  Object.defineProperty(exports, "__esModule", { value: true });
6
- const cheerio_1 = __importDefault(require("cheerio"));
3
+ const cheerio_1 = require("cheerio");
7
4
  const parseFrench = (html) => {
8
- const $ = cheerio_1.default.load(html);
5
+ const $ = (0, cheerio_1.load)(html);
9
6
  const $definitions = $('.tlf_cdefinition');
10
7
  return {
11
8
  definitions: Array.from($definitions).map((definition) => $(definition).text()),
12
- isAllowed: $('#vitemselected span').length > 0,
9
+ exists: $('#vitemselected span').length > 0,
13
10
  };
14
11
  };
15
12
  exports.default = parseFrench;
@@ -1,22 +1,60 @@
1
1
  "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
2
  Object.defineProperty(exports, "__esModule", { value: true });
6
- const cheerio_1 = __importDefault(require("cheerio"));
3
+ const cheerio_1 = require("cheerio");
7
4
  const parseGerman = (html) => {
8
- const $ = cheerio_1.default.load(html);
9
- const $meaningOverview = $('.bedeutungsuebersicht');
10
- let $definitions;
11
- if ($meaningOverview.length > 0) {
12
- $definitions = $('.bedeutungsuebersicht > ol > li > a');
13
- }
14
- else {
15
- $definitions = $('.dwdswb-lesart .dwdswb-definition');
5
+ const $ = (0, cheerio_1.load)(html);
6
+ const definitions = [parseBedeutungsubersicht, parseBedeutungen, parseBedeutung].reduce((results, parse) => (results.length === 0 ? parse($) : results), []);
7
+ const exists = Array.from($('.label-danger')).every((label) => $(label).text() !== 'Hinweis');
8
+ return { definitions, exists };
9
+ };
10
+ const parseBedeutungsubersicht = ($) => {
11
+ Array.from($('.bedeutungsuebersicht ol > li > a')).forEach((item) => {
12
+ $(item).text($(item).text().replace(/\n/g, ''));
13
+ });
14
+ Array.from($('.bedeutungsuebersicht ol > li > ol > li')).forEach((item) => {
15
+ const text = `\n${$(item).text().replace(/\n/g, '')}`;
16
+ const $text = $(`<div>${text}</div>`);
17
+ $(item).replaceWith($text);
18
+ });
19
+ Array.from($('.bedeutungsuebersicht ol > li > ol')).forEach((list) => {
20
+ const $list = $(list);
21
+ const html = $list.html() || '';
22
+ const $html = $(`<div>${html}</div>`);
23
+ const $prev = $list.prev('a');
24
+ if ($prev) {
25
+ $prev.append($html);
26
+ $(list).remove();
27
+ }
28
+ else {
29
+ $(list).replaceWith($html);
30
+ }
31
+ });
32
+ return parseDefinitions($, $('.bedeutungsuebersicht ol > li'));
33
+ };
34
+ const parseBedeutung = ($) => {
35
+ return parseDefinitions($, $('.dwdswb-lesart .dwdswb-definition-spezifizierung'));
36
+ };
37
+ const parseBedeutungen = ($) => {
38
+ const definitions = parseDefinitions($, $('.dwdswb-lesart .dwdswb-definition'));
39
+ if (definitions.length > 0) {
40
+ return definitions;
16
41
  }
17
- return {
18
- definitions: Array.from($definitions).map((definition) => $(definition).text().replace('/\n/', '')),
19
- isAllowed: $definitions.length > 0,
20
- };
42
+ const $references = $('.dwdswb-lesart .dwdswb-verweis');
43
+ const references = Array.from($references).reduce((result, reference) => {
44
+ const html = reference.attribs['data-content'] || '<span />';
45
+ const values = $(html)
46
+ .text()
47
+ .split(';')
48
+ .map((value) => value.trim());
49
+ return result.concat(values);
50
+ }, []);
51
+ return references;
52
+ };
53
+ const parseDefinitions = ($, $definitions) => {
54
+ return Array.from($definitions).map((definition) => $(definition)
55
+ .text()
56
+ .replace(/[ ]+/g, ' ')
57
+ .replace(/[ ]\n/g, '\n')
58
+ .replace(/^[0-9]+\.\s/g, ''));
21
59
  };
22
60
  exports.default = parseGerman;
@@ -1,17 +1,14 @@
1
1
  "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
2
  Object.defineProperty(exports, "__esModule", { value: true });
6
- const cheerio_1 = __importDefault(require("cheerio"));
3
+ const cheerio_1 = require("cheerio");
7
4
  const parsePolish = (html) => {
8
- const $ = cheerio_1.default.load(html);
5
+ const $ = (0, cheerio_1.load)(html);
9
6
  const $header = $($('h1')[0]);
10
7
  const $isAllowed = $header.next();
11
8
  const $definitions = $header.next().next().next().next();
12
9
  return {
13
10
  definitions: $definitions.text().trim().split(/\d+\./),
14
- isAllowed: $isAllowed.text().trim().indexOf('dopuszczalne w grach') >= 0,
11
+ exists: $isAllowed.text().trim().indexOf('dopuszczalne w grach') >= 0,
15
12
  };
16
13
  };
17
14
  exports.default = parsePolish;
@@ -1,11 +1,8 @@
1
1
  "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
2
  Object.defineProperty(exports, "__esModule", { value: true });
6
- const cheerio_1 = __importDefault(require("cheerio"));
3
+ const cheerio_1 = require("cheerio");
7
4
  const parseSpanish = (html) => {
8
- const $ = cheerio_1.default.load(html);
5
+ const $ = (0, cheerio_1.load)(html);
9
6
  $('.verdBold14 + .gris11 + .gris13').remove();
10
7
  $('br + .gris13').remove();
11
8
  $('.grisItalic13 + .gris13').remove();
@@ -26,7 +23,7 @@ const parseSpanish = (html) => {
26
23
  .filter(Boolean)
27
24
  .map((definition) => definition.replace(/\s+\.$/g, ''))
28
25
  .map((definition) => (definition.endsWith('.') ? definition : `${definition}.`)),
29
- isAllowed: definitions.length > 0,
26
+ exists: $('.wrapper > p > strong').text() !== 'No se ha encontrado la palabra exacta',
30
27
  };
31
28
  };
32
29
  exports.default = parseSpanish;
package/build/types.d.ts CHANGED
@@ -1,4 +1,4 @@
1
1
  export interface ParseResult {
2
2
  definitions: string[];
3
- isAllowed: boolean;
3
+ exists: boolean;
4
4
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@scrabble-solver/word-definitions",
3
- "version": "2.9.1",
3
+ "version": "2.9.3",
4
4
  "description": "Scrabble Solver 2 - Word definitions",
5
5
  "main": "build/index.js",
6
6
  "types": "build/index.d.ts",
@@ -10,8 +10,8 @@
10
10
  },
11
11
  "author": {
12
12
  "name": "Kamil Mielnik",
13
- "email": "kamil.adam.mielnik@gmail.com",
14
- "url": "https://kamilmielnik.com/"
13
+ "email": "kamil@kamilmielnik.com",
14
+ "url": "https://kamilmielnik.com"
15
15
  },
16
16
  "license": "CC-BY-NC-ND-4.0",
17
17
  "bugs": {
@@ -23,7 +23,7 @@
23
23
  "clean": "rimraf build/ node_modules/ package-lock.json"
24
24
  },
25
25
  "dependencies": {
26
- "@scrabble-solver/types": "^2.9.1",
26
+ "@scrabble-solver/types": "^2.9.3",
27
27
  "cheerio": "^1.0.0-rc.12",
28
28
  "follow-redirects": "^1.15.2",
29
29
  "striptags": "^3.2.0"
@@ -31,5 +31,5 @@
31
31
  "devDependencies": {
32
32
  "@types/follow-redirects": "^1.14.1"
33
33
  },
34
- "gitHead": "4849b4d123131fe043174f000c523868f3bd68c5"
34
+ "gitHead": "ef8d030cbb76256a377f747f7ebe4b85b7eba61e"
35
35
  }
@@ -3,10 +3,10 @@ import { Locale, WordDefinition } from '@scrabble-solver/types';
3
3
  import { crawl } from './crawl';
4
4
  import { parse } from './parse';
5
5
 
6
- const getWordDefinition = async (locale: Locale, word: string): Promise<WordDefinition> => {
6
+ const getWordDefinition = async (locale: Locale, word: string, isAllowed: boolean): Promise<WordDefinition> => {
7
7
  const html = await crawl(locale, word);
8
- const { definitions, isAllowed } = parse(locale, html);
9
- const wordDefinition = new WordDefinition({ definitions, isAllowed, word });
8
+ const { definitions, exists } = parse(locale, html);
9
+ const wordDefinition = new WordDefinition({ definitions, exists, isAllowed, word });
10
10
  return wordDefinition;
11
11
  };
12
12
 
@@ -6,7 +6,9 @@ const EMPHASIS_TAGS = ['a', 'b', 'em', 'internalXref'];
6
6
 
7
7
  const normalizeHtmlTags: Normalize = (definition) => striptags(striptags(definition, EMPHASIS_TAGS), undefined, '"');
8
8
 
9
- const normalizeLineBreaks: Normalize = (definition) => definition.replace(/[\r\n]/g, '');
9
+ const normalizeLineBreaks: Normalize = (definition) => definition.replace(/\r\n/g, '\n');
10
+
11
+ const normalizeWhitespace: Normalize = (definition) => definition.replace(/[ ]+/g, ' ');
10
12
 
11
13
  const normalizeQuotes: Normalize = (definition) => definition.replace(/\."/g, '".');
12
14
 
@@ -33,6 +35,7 @@ const normalizers: Normalize[] = [
33
35
  normalizeMarkers,
34
36
  normalizeQuotes,
35
37
  normalizeLineBreaks,
38
+ normalizeWhitespace,
36
39
  normalizeTrailingSymbols,
37
40
  normalizeLeadingSymbols,
38
41
  normalizeNonWords,
@@ -0,0 +1,4 @@
1
+ {
2
+ "definitions": ["[umgangssprachlich] ...\n1. drückt Einverständnis, Zustimmung aus\n2. drückt Nachdenklichkeit aus"],
3
+ "exists": true
4
+ }
@@ -0,0 +1,9 @@
1
+ {
2
+ "definitions": [
3
+ "⟨ho, ho!⟩, ⟨ho, ho, ho!⟩, ⟨ho, ho, ho, ho!⟩ imitiert ein Lachen mit tiefer, maskuliner Stimme oder auch ein spöttisches, höhnisches oder triumphierendes Lachen",
4
+ "⟨ho!⟩, ⟨ho, ho!⟩\na) drückt Protest, Zweifel oder Abwehr aus\nb) drückt Überraschung, Begeisterung oder Freude aus",
5
+ "[umgangssprachlich] Zuruf, um jmdn. auf sich aufmerksam zu machen; auch zur Begrüßung verwendet",
6
+ "Zuruf vor allem an Reittiere und Zugtiere\na) verwendet, um das Tier zu beruhigen, zu loben und aufzufordern, langsamer zu werden, stehen zu bleiben\nb) Zuruf, der das Tier antreiben soll"
7
+ ],
8
+ "exists": true
9
+ }
@@ -0,0 +1,4 @@
1
+ {
2
+ "definitions": ["drückt Überraschung aus"],
3
+ "exists": true
4
+ }
@@ -0,0 +1,16 @@
1
+ {
2
+ "definitions": [
3
+ "gibt an, dass ein Ort, eine Person Ausgangspunkt einer Bewegung oder eines Vorgangs ist, der einer Bewegung ähnelt",
4
+ "drückt den Vorgang des Entfernens, den Zustand des Getrenntseins von etw., jmdm. aus",
5
+ "gibt an, dass ein Ort, eine Person Anfangspunkt einer räumlichen Erstreckung, Bezugspunkt für die Kennzeichnung einer Entfernung oder eines Lageverhältnisses ist",
6
+ "gibt die Herkunft, den Ursprung an",
7
+ "gibt das Mittel an",
8
+ "mit",
9
+ "gibt den Bereich an, für den eine Aussage gilt",
10
+ "was … betrifft, hinsichtlich",
11
+ "gibt bei Aussagen über bestimmte Eigenschaften, Verhaltensweisen die Person an, für die die Aussage gilt",
12
+ "gibt den Gegenstand, Inhalt an",
13
+ "über"
14
+ ],
15
+ "exists": true
16
+ }
@@ -5,5 +5,5 @@
5
5
  "the power to inspire dread",
6
6
  "to inspire or fill with awe"
7
7
  ],
8
- "isAllowed": true
8
+ "exists": true
9
9
  }
@@ -9,5 +9,5 @@
9
9
  "the act of pawning",
10
10
  "to deposit in pledge or as security especially in exchange for money"
11
11
  ],
12
- "isAllowed": true
12
+ "exists": true
13
13
  }
@@ -1,4 +1,4 @@
1
1
  {
2
2
  "definitions": ["a member of an American Indian people originally of Kansas and Nebraska"],
3
- "isAllowed": true
3
+ "exists": true
4
4
  }
@@ -5,5 +5,5 @@
5
5
  "of the heraldic fur pean",
6
6
  "variant spelling of peen"
7
7
  ],
8
- "isAllowed": true
8
+ "exists": true
9
9
  }
@@ -1,4 +1,4 @@
1
1
  {
2
2
  "definitions": ["a person who is very good at something, wizard"],
3
- "isAllowed": true
3
+ "exists": true
4
4
  }
@@ -3,5 +3,5 @@
3
3
  "Cepo de madera que se fijaba al pie de un hombre o un animal para impedirle andar.",
4
4
  "Molestia o impedimento para obrar con libertad."
5
5
  ],
6
- "isAllowed": true
6
+ "exists": true
7
7
  }
@@ -5,5 +5,5 @@
5
5
  "Funcionario subalterno que se encarga de la vigilancia, limpieza o servicios auxiliares en oficinas públicas.",
6
6
  "Jugador que en algunos deportes defiende la portería."
7
7
  ],
8
- "isAllowed": true
8
+ "exists": true
9
9
  }