datamuse 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datamuse
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: datamuse is a simple wrapper around the datamuse api https://www.datamuse.com/api/
5
5
  Author-email: Bivas Kumar <thetrotfreak@yahoo.com>
6
6
  License-Expression: MIT
@@ -0,0 +1,101 @@
1
+ from enum import StrEnum
2
+ from types import MappingProxyType
3
+ from typing import Literal, LiteralString, NotRequired, TypeAlias, TypedDict
4
+
5
+ Word: TypeAlias = LiteralString
6
+
7
+
8
+ RelatedWordCode = Literal[
9
+ "nouns_adjective",
10
+ "adjectives_noun",
11
+ "synonyms",
12
+ "triggers",
13
+ "antonyms",
14
+ "hypernyms",
15
+ "hyponyms",
16
+ "holonyms",
17
+ "meronyms",
18
+ "frequent_followers",
19
+ "frequent_predecessors",
20
+ "homophones",
21
+ "consonant",
22
+ ]
23
+
24
+
25
+ _lookup_related_code = MappingProxyType(
26
+ {
27
+ "nouns_adjective": "jja",
28
+ "adjectives_noun": "jjb",
29
+ "synonyms": "syn",
30
+ "triggers": "trg",
31
+ "antonyms": "ant",
32
+ "hypernyms": "spc",
33
+ "hyponyms": "gen",
34
+ "holonyms": "com",
35
+ "meronyms": "par",
36
+ "frequent_followers": "bga",
37
+ "frequent_predecessors": "bgb",
38
+ "homophones": "hom",
39
+ "consonant": "cns",
40
+ }
41
+ )
42
+
43
+
44
+ MetadataFlag = Literal[
45
+ "definitions",
46
+ "parts_of_speech",
47
+ "syllable_count",
48
+ ]
49
+
50
+
51
+ _lookup_metadata_flag = MappingProxyType(
52
+ {
53
+ "definitions": "d",
54
+ "parts_of_speech": "p",
55
+ "syllable_count": "s",
56
+ }
57
+ )
58
+
59
+
60
+ class WordRelation(StrEnum):
61
+ """
62
+ When paired with a `Word`, the `Word` will be in a predefined lexical relation.
63
+ """
64
+
65
+ nouns_adjective = "jja"
66
+ adjectives_noun = "jjb"
67
+ synonyms = "syn"
68
+ triggers = "trg"
69
+ antonyms = "ant"
70
+ hypernyms = "spc"
71
+ hyponyms = "gen"
72
+ holonyms = "com"
73
+ meronyms = "par"
74
+ frequent_followers = "bga"
75
+ frequent_predecessors = "bgb"
76
+ homophones = "hom"
77
+ consonant = "cns"
78
+
79
+
80
+ class WordMetadata(StrEnum):
81
+ """
82
+ Extra lexical knowledge for a `Word`
83
+ """
84
+
85
+ definitions = "d"
86
+ parts_of_speech = "p"
87
+ syllable_count = "s"
88
+
89
+
90
+ class WordObject(TypedDict):
91
+ """
92
+ Representation of a word from the Datamuse API
93
+ """
94
+
95
+ word: Word
96
+ defs: NotRequired[list[str]]
97
+ tags: NotRequired[list[str]]
98
+ numSyllables: NotRequired[int]
99
+
100
+
101
+ WordArray: TypeAlias = list[WordObject]
@@ -0,0 +1,199 @@
1
+ import functools
2
+ from types import MappingProxyType
3
+ from typing import Any, final
4
+
5
+ import certifi
6
+ import urllib3
7
+
8
+ from datamuse.annotations import (
9
+ MetadataFlag,
10
+ RelatedWordCode,
11
+ Word,
12
+ WordArray,
13
+ WordObject,
14
+ _lookup_metadata_flag,
15
+ _lookup_related_code,
16
+ )
17
+
18
+
19
+ @final
20
+ class Datamuse:
21
+ """
22
+ The [Datamuse API](https://www.datamuse.com/api/) is a word-finding query engine for developers.
23
+
24
+ Use it in your apps to find words that match a given set of constraints and that are likely in a given context.
25
+ Specify a wide variety of constraints on meaning, spelling, sound, and vocabulary in your queries, in any combination.
26
+ """
27
+
28
+ __API_URL = "api.datamuse.com"
29
+ __slots__ = ("__pool", "__metadata_flags", "__metadata", "_metadata")
30
+
31
+ def __init__(self) -> None:
32
+ self.__pool = urllib3.HTTPSConnectionPool(
33
+ host=self.__API_URL,
34
+ port=443,
35
+ cert_reqs="CERT_REQUIRED",
36
+ ca_certs=certifi.where(),
37
+ )
38
+ self.__metadata_flags: dict[str, str] = {}
39
+ self.__metadata: dict[str, dict[str, Any]] = {}
40
+ self._metadata = MappingProxyType(self.__metadata)
41
+
42
+ @property
43
+ def metadata(self):
44
+ """
45
+ A mapping of a word to its metadata.
46
+ """
47
+ return self._metadata
48
+
49
+ def _get_words(self, **kwds: Word):
50
+ parsed = self.__get("/words", **kwds, **self.__metadata_flags)
51
+ self.__metadata_flags.clear()
52
+ return parsed
53
+
54
+ def _get_suggestions(self, **kwds: Word):
55
+ parsed = self.__get("/sug", **kwds, **self.__metadata_flags)
56
+ self.__metadata_flags.clear()
57
+ return parsed
58
+
59
+ @functools.lru_cache
60
+ def __get(self, url: str, **kwds: Word) -> list[Word]:
61
+ json_response = self.__pool.request(method="GET", url=url, fields=kwds).json()
62
+ words = self._make_metadata(json_response)
63
+ return words or [obj["word"] for obj in json_response]
64
+
65
+ def _make_metadata(self, json_response: WordArray) -> list[Word]:
66
+ """
67
+ Builds a `metadata` dict by parsing the JSON Reponse, returing a flattened list of string.
68
+
69
+ The `metadata` is updated per parsing.
70
+ The keys may not be same across parsing since it depends on the
71
+ metdata flags with which the API call was made.
72
+
73
+ :param json_response: The json response from the datamuse api
74
+ :type json_response: WordArray
75
+ :return: A flattened list of string
76
+ :rtype: list[Word]
77
+ """
78
+ words = []
79
+
80
+ if self.__metadata_flags:
81
+ flags = self.__metadata_flags["md"]
82
+
83
+ for obj in json_response:
84
+ word = obj["word"]
85
+ words.append(word)
86
+
87
+ if word not in self.__metadata:
88
+ self.__metadata[word] = {}
89
+
90
+ for f in flags:
91
+ match f:
92
+ case "d":
93
+ self._make_definitions(obj)
94
+ case "p":
95
+ self._make_parts_of_speech(obj)
96
+ case "s":
97
+ self._make_syllable_count(obj)
98
+ case "_": # pragma: no cover
99
+ # TODO: support remaining documenetd metadata flags
100
+ continue
101
+ return words
102
+
103
+ def _make_definitions(self, obj: WordObject, /):
104
+ # TODO: what is the `defHeadWord` in api response?
105
+ self.__metadata[obj["word"]].update(
106
+ definitions=list(map(str.expandtabs, obj.get("defs", [])))
107
+ )
108
+
109
+ def _make_syllable_count(self, obj: WordObject, /):
110
+ self.__metadata[obj["word"]].update(syllable_count=obj.get("numSyllables", 0))
111
+
112
+ def _make_parts_of_speech(self, obj: WordObject, /):
113
+ self.__metadata[obj["word"]].update(parts_of_speech=[])
114
+ for t in obj.get("tags", []):
115
+ match t:
116
+ case "n":
117
+ self.__metadata[obj["word"]]["parts_of_speech"].append("noun")
118
+ case "v":
119
+ self.__metadata[obj["word"]]["parts_of_speech"].append("verb")
120
+ case "adj":
121
+ self.__metadata[obj["word"]]["parts_of_speech"].append("adjective")
122
+ case "adv":
123
+ self.__metadata[obj["word"]]["parts_of_speech"].append("adverb")
124
+ case _:
125
+ pass
126
+
127
+ def synonyms(self, ml: Word):
128
+ """
129
+ words with a meaning similar to `ml`
130
+
131
+ :param ml: means like
132
+ """
133
+ return self._get_words(ml=ml)
134
+
135
+ def associations(self, ml: Word, start: Word = "*", end: Word = "*"):
136
+ """
137
+ words related to `ml`
138
+
139
+ :param ml: means like
140
+ :param start: start with
141
+ :param end: end in
142
+ """
143
+ return self._get_words(ml=ml, sp=start + end)
144
+
145
+ def homophones(self, sl: Word):
146
+ """
147
+ words that sound like `sl`
148
+
149
+ :param sl: sounds like
150
+ """
151
+ return self._get_words(sl=sl)
152
+
153
+ def pattern(self, start: Word, end: Word, letters: int):
154
+ """
155
+ words that start with `start`, end in `end`, and have `letters` in between
156
+
157
+ :param start: start with
158
+ :param end: end in
159
+ :param letters: letters in between
160
+ """
161
+ return self._get_words(sp=f"{start[0]}{'?' * letters}{end[0]}")
162
+
163
+ def orthographic_neighbours(self, sp: Word):
164
+ """
165
+ words that are spelled similarly to `sp`
166
+
167
+ :param sp: spelled like
168
+ """
169
+ return self._get_words(sp=sp)
170
+
171
+ def related(self, word: Word, rel: RelatedWordCode):
172
+ """
173
+ words that are related by `rel`
174
+
175
+ :param word: the word
176
+ :param rel: related word
177
+ """
178
+ return self._get_words(**{f"rel_{_lookup_related_code[rel]}": word}) # pyright: ignore[reportArgumentType]
179
+
180
+ def suggestions(self, s: Word):
181
+ """
182
+ sugesstions from prefix hint string `s`
183
+
184
+ :param s: prefix hint string
185
+ """
186
+ return self._get_suggestions(s=s)
187
+
188
+ def with_metadata(self, *md: MetadataFlag):
189
+ """
190
+ Include extra lexical knowledge for a `Word`.
191
+
192
+ Accessible through the `metadata` property.
193
+
194
+ :param md: the metadata
195
+ """
196
+ self.__metadata_flags.update(
197
+ md="".join({_lookup_metadata_flag[meta] for meta in md})
198
+ )
199
+ return self
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datamuse
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: datamuse is a simple wrapper around the datamuse api https://www.datamuse.com/api/
5
5
  Author-email: Bivas Kumar <thetrotfreak@yahoo.com>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "datamuse"
3
- version = "0.2.0"
3
+ version = "0.3.0"
4
4
  authors = [
5
5
  { name="Bivas Kumar", email="thetrotfreak@yahoo.com" },
6
6
  ]
@@ -1,6 +1,6 @@
1
1
  import pytest
2
2
 
3
- from datamuse.annotations import _lookup_related_code
3
+ from datamuse.annotations import _lookup_metadata_flag, _lookup_related_code
4
4
 
5
5
 
6
6
  class TestDatamuse:
@@ -83,3 +83,23 @@ class TestDatamuse:
83
83
  )
84
84
  related = mock.related(word_mock, parameter)
85
85
  assert word_mock in related
86
+
87
+ @pytest.mark.parametrize("flag", _lookup_metadata_flag)
88
+ def test_with_metadata_words(self, datamuse_mock, word_mock, flag):
89
+ mock = datamuse_mock(
90
+ method="GET",
91
+ url="/words",
92
+ response=[
93
+ {
94
+ "word": word_mock,
95
+ "tags": ["u", "n", "v", "adj", "adv"],
96
+ "defs": [word_mock],
97
+ "numSyllables": len(word_mock),
98
+ }
99
+ ],
100
+ match_query={"ml": word_mock, "md": f"{_lookup_metadata_flag[flag]}"},
101
+ )
102
+ synonyms = mock.with_metadata(flag).synonyms(word_mock)
103
+ assert word_mock in synonyms
104
+ assert word_mock in mock.metadata
105
+ assert flag in mock.metadata[word_mock]
@@ -1,40 +0,0 @@
1
- from types import MappingProxyType
2
- from typing import Literal, LiteralString, TypeAlias
3
-
4
- Word: TypeAlias = LiteralString
5
-
6
-
7
- RelatedWordCode = Literal[
8
- "nouns_adjective",
9
- "adjectives_noun",
10
- "synonyms",
11
- "triggers",
12
- "antonyms",
13
- "hypernyms",
14
- "hyponyms",
15
- "holonyms",
16
- "meronyms",
17
- "frequent_followers",
18
- "frequent_predecessors",
19
- "homophones",
20
- "consonant",
21
- ]
22
-
23
-
24
- _lookup_related_code = MappingProxyType(
25
- {
26
- "nouns_adjective": "jja",
27
- "adjectives_noun": "jjb",
28
- "synonyms": "syn",
29
- "triggers": "trg",
30
- "antonyms": "ant",
31
- "hypernyms": "spc",
32
- "hyponyms": "gen",
33
- "holonyms": "com",
34
- "meronyms": "par",
35
- "frequent_followers": "bga",
36
- "frequent_predecessors": "bgb",
37
- "homophones": "hom",
38
- "consonant": "cns",
39
- }
40
- )
@@ -1,99 +0,0 @@
1
- import functools
2
- from typing import final
3
-
4
- import certifi
5
- import urllib3
6
-
7
- from datamuse.annotations import RelatedWordCode, Word, _lookup_related_code
8
-
9
-
10
- @final
11
- class Datamuse:
12
- """
13
- The [Datamuse](https://www.datamuse.com/) [API](https://www.datamuse.com/api/) is a word-finding query engine for developers.
14
-
15
- You can use it in your apps to find words that match a given set of constraints and that are likely in a given context.
16
- You can specify a wide variety of constraints on meaning, spelling, sound, and vocabulary in your queries, in any combination.
17
- """
18
-
19
- __API_URL = "api.datamuse.com"
20
- __slots__ = ("__pool",)
21
-
22
- def __init__(self) -> None:
23
- self.__pool = urllib3.HTTPSConnectionPool(
24
- host=self.__API_URL,
25
- port=443,
26
- cert_reqs="CERT_REQUIRED",
27
- ca_certs=certifi.where(),
28
- )
29
-
30
- @functools.lru_cache
31
- def __get_words(self, **kwds: Word | RelatedWordCode) -> list[Word]:
32
- response = self.__pool.request(method="GET", url="/words", fields=kwds)
33
- return [word["word"] for word in response.json()]
34
-
35
- @functools.lru_cache
36
- def __get_suggestions(self, **kwds: Word | RelatedWordCode) -> list[Word]:
37
- response = self.__pool.request(method="GET", url="/sug", fields=kwds)
38
- return [word["word"] for word in response.json()]
39
-
40
- def synonyms(self, ml: Word):
41
- """
42
- words with a meaning similar to `ml`
43
-
44
- :param ml: means like
45
- """
46
- return self.__get_words(ml=ml)
47
-
48
- def associations(self, ml: Word, start: Word = "*", end: Word = "*"):
49
- """
50
- words related to `ml`
51
-
52
- :param ml: means like
53
- :param start: start with
54
- :param end: end in
55
- """
56
- return self.__get_words(ml=ml, sp=start + end)
57
-
58
- def homophones(self, sl: Word):
59
- """
60
- words that sound like `sl`
61
-
62
- :param sl: sounds like
63
- """
64
- return self.__get_words(sl=sl)
65
-
66
- def pattern(self, start: Word, end: Word, letters: int):
67
- """
68
- words that start with `start`, end in `end`, and have `letters` in between
69
-
70
- :param start: start with
71
- :param end: end in
72
- :param letters: letters in between
73
- """
74
- return self.__get_words(sp=f"{start[0]}{'?' * letters}{end[0]}")
75
-
76
- def orthographic_neighbours(self, sp: Word):
77
- """
78
- words that are spelled similarly to `sp`
79
-
80
- :param sp: spelled like
81
- """
82
- return self.__get_words(sp=sp)
83
-
84
- def related(self, word: Word, rel: RelatedWordCode):
85
- """
86
- words that are related by `rel`
87
-
88
- :param word: the word
89
- :param rel: related word
90
- """
91
- return self.__get_words(**{f"rel_{_lookup_related_code[rel]}": word})
92
-
93
- def suggestions(self, s: Word):
94
- """
95
- sugesstions from prefix hint string `s`
96
-
97
- :param s: prefix hint string
98
- """
99
- return self.__get_suggestions(s=s)
File without changes
File without changes
File without changes
File without changes