skrutable 2.0.0__tar.gz → 2.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skrutable-2.0.2/PKG-INFO +32 -0
- skrutable-2.0.2/src/skrutable/__init__.py +1 -0
- {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable/splitting.py +25 -30
- skrutable-2.0.2/src/skrutable.egg-info/PKG-INFO +32 -0
- skrutable-2.0.0/PKG-INFO +0 -31
- skrutable-2.0.0/src/skrutable/__init__.py +0 -1
- skrutable-2.0.0/src/skrutable.egg-info/PKG-INFO +0 -31
- {skrutable-2.0.0 → skrutable-2.0.2}/LICENSE.md +0 -0
- {skrutable-2.0.0 → skrutable-2.0.2}/README.md +0 -0
- {skrutable-2.0.0 → skrutable-2.0.2}/setup.cfg +0 -0
- {skrutable-2.0.0 → skrutable-2.0.2}/setup.py +0 -0
- {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable/config.json +0 -0
- {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable/config.py +0 -0
- {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable/manual.md +0 -0
- {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable/meter_identification.py +0 -0
- {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable/meter_patterns.py +0 -0
- {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable/phonemes.py +0 -0
- {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable/scansion.py +0 -0
- {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable/scheme_detection.py +0 -0
- {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable/scheme_maps.py +0 -0
- {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable/scheme_vectors_mbh.py +0 -0
- {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable/transliteration.py +0 -0
- {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable/virAma_avoidance.py +0 -0
- {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable.egg-info/SOURCES.txt +0 -0
- {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable.egg-info/dependency_links.txt +0 -0
- {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable.egg-info/requires.txt +0 -0
- {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable.egg-info/top_level.txt +0 -0
skrutable-2.0.2/PKG-INFO
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: skrutable
|
|
3
|
+
Version: 2.0.2
|
|
4
|
+
Summary: skrutable library for working with Sanskrit text
|
|
5
|
+
Home-page: https://github.com/tylergneill/skrutable
|
|
6
|
+
Author: Tyler Neill
|
|
7
|
+
Author-email: tyler.g.neill@gmail.com
|
|
8
|
+
License: CC BY-SA 4.0
|
|
9
|
+
Keywords: Sanskrit text transliteration scansion meter identification sandhi compound splitting
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
Provides-Extra: testing
|
|
12
|
+
License-File: LICENSE.md
|
|
13
|
+
|
|
14
|
+
# skrutable
|
|
15
|
+
|
|
16
|
+
A toolkit and online workbench providing
|
|
17
|
+
transliteration, scansion, and meter identification for Sanskrit text,
|
|
18
|
+
as well access to powerful sandhi and compound splitting.
|
|
19
|
+
|
|
20
|
+
Web-app interface live online at [skrutable.info](https://www.skrutable.info).
|
|
21
|
+
|
|
22
|
+
Install package from [PyPi](https://pypi.org/project/skrutable/) with `pip install skrutable`.
|
|
23
|
+
|
|
24
|
+
See [manual.md](https://github.com/tylergneill/skrutable/blob/main/src/skrutable/manual.md) for instructions.
|
|
25
|
+
|
|
26
|
+
Feedback welcome!
|
|
27
|
+
My name is Tyler
|
|
28
|
+
([Academia](https://uni-leipzig1.academia.edu/TylerNeill),
|
|
29
|
+
[LinkedIn](https://www.linkedin.com/in/tyler-g-neill/))
|
|
30
|
+
and my Gmail is tyler.g.neill.
|
|
31
|
+
|
|
32
|
+
And please share and share-alike! Licensed under a [Creative Commons Attribution-ShareAlike 4.0 International License](https://creativecommons.org/licenses/by-sa/4.0/).
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "2.0.2"
|
|
@@ -102,7 +102,12 @@ class Splitter(object):
|
|
|
102
102
|
sentence_results.append(new_sentence)
|
|
103
103
|
return sentence_results
|
|
104
104
|
|
|
105
|
-
def _get_dharmamitra_split(
|
|
105
|
+
def _get_dharmamitra_split(
|
|
106
|
+
self,
|
|
107
|
+
text_input: str,
|
|
108
|
+
preserve_compound_hyphens: bool=True,
|
|
109
|
+
batch_size: int=2000
|
|
110
|
+
) -> List[str]:
|
|
106
111
|
# TODO: change to "unsandhied-morphosyntax" when it works
|
|
107
112
|
mode = "unsandhied-lemma-morphosyntax" if preserve_compound_hyphens else "unsandhied"
|
|
108
113
|
"""
|
|
@@ -117,31 +122,31 @@ class Splitter(object):
|
|
|
117
122
|
headers = {
|
|
118
123
|
'Content-Type': 'application/json',
|
|
119
124
|
}
|
|
120
|
-
data = {
|
|
121
|
-
"input_sentence": text_input,
|
|
122
|
-
"mode": mode,
|
|
123
|
-
"input_encoding": "auto",
|
|
124
|
-
"human_readable_tags": False,
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
response = requests.post(url, headers=headers, json=data)
|
|
128
125
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
126
|
+
sentences = text_input.split('\n')
|
|
127
|
+
results = []
|
|
128
|
+
for i in range(0, len(sentences), batch_size):
|
|
129
|
+
sentence_batch = sentences[i:i + batch_size]
|
|
130
|
+
batch_text_input = '\n'.join(sentence_batch)
|
|
131
|
+
data = {
|
|
132
|
+
"input_sentence": batch_text_input,
|
|
133
|
+
"mode": mode,
|
|
134
|
+
"input_encoding": "auto",
|
|
135
|
+
"human_readable_tags": False,
|
|
136
|
+
}
|
|
137
|
+
response = requests.post(url, headers=headers, json=data)
|
|
138
|
+
if response.status_code != 200:
|
|
139
|
+
response.raise_for_status()
|
|
140
|
+
batch_result = self._parse_dharmamitra_result(response.json(), mode)
|
|
141
|
+
results.extend(batch_result)
|
|
142
|
+
|
|
143
|
+
return results
|
|
133
144
|
|
|
134
145
|
def _post_string_2018(self, input_text: str, url: str=SPLITTER_SERVER_URL):
|
|
135
146
|
json_payload = {'input_text': input_text}
|
|
136
147
|
result = requests.post(url, json=json_payload)
|
|
137
148
|
return result.text
|
|
138
149
|
|
|
139
|
-
def _post_file_2018(self, input_file_path: str, url: str=SPLITTER_SERVER_URL):
|
|
140
|
-
input_file = open(input_file_path, 'rb')
|
|
141
|
-
file_payload = {"input_file": input_file}
|
|
142
|
-
result = requests.post(url, files=file_payload)
|
|
143
|
-
return result.text
|
|
144
|
-
|
|
145
150
|
def _clean_up_2018(self, split_sentences_str: str, split_appearance: str=' ') -> List[str]:
|
|
146
151
|
for (r_1, r_2) in [
|
|
147
152
|
('-\n', '\n'), # remove line-final hyphens
|
|
@@ -176,7 +181,6 @@ class Splitter(object):
|
|
|
176
181
|
splitter_model: str='dharmamitra_2024_sept',
|
|
177
182
|
preserve_compound_hyphens: bool = PRESERVE_COMPOUND_HYPHENS_DEFAULT,
|
|
178
183
|
preserve_punctuation: bool=PRESERVE_PUNCTUATION_DEFAULT,
|
|
179
|
-
whole_file: bool=False,
|
|
180
184
|
) -> str:
|
|
181
185
|
"""
|
|
182
186
|
Splits sandhi and compounds of multi-line Sanskrit string,
|
|
@@ -213,16 +217,7 @@ class Splitter(object):
|
|
|
213
217
|
|
|
214
218
|
elif splitter_model == 'splitter_2018':
|
|
215
219
|
|
|
216
|
-
split_sentences_str
|
|
217
|
-
|
|
218
|
-
if whole_file:
|
|
219
|
-
# write prepared string to Splitter input buffer and send as binary
|
|
220
|
-
with open(SPLITTER_INPUT_BUFFER_FN, 'w') as f_out:
|
|
221
|
-
f_out.write(sentences_str)
|
|
222
|
-
split_sentences_str = self._post_file_2018(SPLITTER_INPUT_BUFFER_FN)
|
|
223
|
-
else:
|
|
224
|
-
split_sentences_str = self._post_string_2018(sentences_str)
|
|
225
|
-
|
|
220
|
+
split_sentences_str = self._post_string_2018(sentences_str)
|
|
226
221
|
split_sentences = self._clean_up_2018(split_sentences_str)
|
|
227
222
|
|
|
228
223
|
else:
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: skrutable
|
|
3
|
+
Version: 2.0.2
|
|
4
|
+
Summary: skrutable library for working with Sanskrit text
|
|
5
|
+
Home-page: https://github.com/tylergneill/skrutable
|
|
6
|
+
Author: Tyler Neill
|
|
7
|
+
Author-email: tyler.g.neill@gmail.com
|
|
8
|
+
License: CC BY-SA 4.0
|
|
9
|
+
Keywords: Sanskrit text transliteration scansion meter identification sandhi compound splitting
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
Provides-Extra: testing
|
|
12
|
+
License-File: LICENSE.md
|
|
13
|
+
|
|
14
|
+
# skrutable
|
|
15
|
+
|
|
16
|
+
A toolkit and online workbench providing
|
|
17
|
+
transliteration, scansion, and meter identification for Sanskrit text,
|
|
18
|
+
as well access to powerful sandhi and compound splitting.
|
|
19
|
+
|
|
20
|
+
Web-app interface live online at [skrutable.info](https://www.skrutable.info).
|
|
21
|
+
|
|
22
|
+
Install package from [PyPi](https://pypi.org/project/skrutable/) with `pip install skrutable`.
|
|
23
|
+
|
|
24
|
+
See [manual.md](https://github.com/tylergneill/skrutable/blob/main/src/skrutable/manual.md) for instructions.
|
|
25
|
+
|
|
26
|
+
Feedback welcome!
|
|
27
|
+
My name is Tyler
|
|
28
|
+
([Academia](https://uni-leipzig1.academia.edu/TylerNeill),
|
|
29
|
+
[LinkedIn](https://www.linkedin.com/in/tyler-g-neill/))
|
|
30
|
+
and my Gmail is tyler.g.neill.
|
|
31
|
+
|
|
32
|
+
And please share and share-alike! Licensed under a [Creative Commons Attribution-ShareAlike 4.0 International License](https://creativecommons.org/licenses/by-sa/4.0/).
|
skrutable-2.0.0/PKG-INFO
DELETED
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: skrutable
|
|
3
|
-
Version: 2.0.0
|
|
4
|
-
Summary: skrutable library for working with Sanskrit text
|
|
5
|
-
Home-page: https://github.com/tylergneill/skrutable
|
|
6
|
-
Author: Tyler Neill
|
|
7
|
-
Author-email: tyler.g.neill@gmail.com
|
|
8
|
-
License: CC BY-SA 4.0
|
|
9
|
-
Description: # skrutable
|
|
10
|
-
|
|
11
|
-
A toolkit and online workbench providing
|
|
12
|
-
transliteration, scansion, and meter identification for Sanskrit text,
|
|
13
|
-
as well access to powerful sandhi and compound splitting.
|
|
14
|
-
|
|
15
|
-
Web-app interface live online at [skrutable.info](https://www.skrutable.info).
|
|
16
|
-
|
|
17
|
-
Install package from [PyPi](https://pypi.org/project/skrutable/) with `pip install skrutable`.
|
|
18
|
-
|
|
19
|
-
See [manual.md](https://github.com/tylergneill/skrutable/blob/main/src/skrutable/manual.md) for instructions.
|
|
20
|
-
|
|
21
|
-
Feedback welcome!
|
|
22
|
-
My name is Tyler
|
|
23
|
-
([Academia](https://uni-leipzig1.academia.edu/TylerNeill),
|
|
24
|
-
[LinkedIn](https://www.linkedin.com/in/tyler-g-neill/))
|
|
25
|
-
and my Gmail is tyler.g.neill.
|
|
26
|
-
|
|
27
|
-
And please share and share-alike! Licensed under a [Creative Commons Attribution-ShareAlike 4.0 International License](https://creativecommons.org/licenses/by-sa/4.0/).
|
|
28
|
-
Keywords: Sanskrit text transliteration scansion meter identification sandhi compound splitting
|
|
29
|
-
Platform: UNKNOWN
|
|
30
|
-
Description-Content-Type: text/markdown
|
|
31
|
-
Provides-Extra: testing
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "2.0.0"
|
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: skrutable
|
|
3
|
-
Version: 2.0.0
|
|
4
|
-
Summary: skrutable library for working with Sanskrit text
|
|
5
|
-
Home-page: https://github.com/tylergneill/skrutable
|
|
6
|
-
Author: Tyler Neill
|
|
7
|
-
Author-email: tyler.g.neill@gmail.com
|
|
8
|
-
License: CC BY-SA 4.0
|
|
9
|
-
Description: # skrutable
|
|
10
|
-
|
|
11
|
-
A toolkit and online workbench providing
|
|
12
|
-
transliteration, scansion, and meter identification for Sanskrit text,
|
|
13
|
-
as well access to powerful sandhi and compound splitting.
|
|
14
|
-
|
|
15
|
-
Web-app interface live online at [skrutable.info](https://www.skrutable.info).
|
|
16
|
-
|
|
17
|
-
Install package from [PyPi](https://pypi.org/project/skrutable/) with `pip install skrutable`.
|
|
18
|
-
|
|
19
|
-
See [manual.md](https://github.com/tylergneill/skrutable/blob/main/src/skrutable/manual.md) for instructions.
|
|
20
|
-
|
|
21
|
-
Feedback welcome!
|
|
22
|
-
My name is Tyler
|
|
23
|
-
([Academia](https://uni-leipzig1.academia.edu/TylerNeill),
|
|
24
|
-
[LinkedIn](https://www.linkedin.com/in/tyler-g-neill/))
|
|
25
|
-
and my Gmail is tyler.g.neill.
|
|
26
|
-
|
|
27
|
-
And please share and share-alike! Licensed under a [Creative Commons Attribution-ShareAlike 4.0 International License](https://creativecommons.org/licenses/by-sa/4.0/).
|
|
28
|
-
Keywords: Sanskrit text transliteration scansion meter identification sandhi compound splitting
|
|
29
|
-
Platform: UNKNOWN
|
|
30
|
-
Description-Content-Type: text/markdown
|
|
31
|
-
Provides-Extra: testing
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|