skrutable 2.0.0__tar.gz → 2.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. skrutable-2.0.2/PKG-INFO +32 -0
  2. skrutable-2.0.2/src/skrutable/__init__.py +1 -0
  3. {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable/splitting.py +25 -30
  4. skrutable-2.0.2/src/skrutable.egg-info/PKG-INFO +32 -0
  5. skrutable-2.0.0/PKG-INFO +0 -31
  6. skrutable-2.0.0/src/skrutable/__init__.py +0 -1
  7. skrutable-2.0.0/src/skrutable.egg-info/PKG-INFO +0 -31
  8. {skrutable-2.0.0 → skrutable-2.0.2}/LICENSE.md +0 -0
  9. {skrutable-2.0.0 → skrutable-2.0.2}/README.md +0 -0
  10. {skrutable-2.0.0 → skrutable-2.0.2}/setup.cfg +0 -0
  11. {skrutable-2.0.0 → skrutable-2.0.2}/setup.py +0 -0
  12. {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable/config.json +0 -0
  13. {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable/config.py +0 -0
  14. {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable/manual.md +0 -0
  15. {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable/meter_identification.py +0 -0
  16. {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable/meter_patterns.py +0 -0
  17. {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable/phonemes.py +0 -0
  18. {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable/scansion.py +0 -0
  19. {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable/scheme_detection.py +0 -0
  20. {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable/scheme_maps.py +0 -0
  21. {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable/scheme_vectors_mbh.py +0 -0
  22. {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable/transliteration.py +0 -0
  23. {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable/virAma_avoidance.py +0 -0
  24. {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable.egg-info/SOURCES.txt +0 -0
  25. {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable.egg-info/dependency_links.txt +0 -0
  26. {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable.egg-info/requires.txt +0 -0
  27. {skrutable-2.0.0 → skrutable-2.0.2}/src/skrutable.egg-info/top_level.txt +0 -0
@@ -0,0 +1,32 @@
1
+ Metadata-Version: 2.1
2
+ Name: skrutable
3
+ Version: 2.0.2
4
+ Summary: skrutable library for working with Sanskrit text
5
+ Home-page: https://github.com/tylergneill/skrutable
6
+ Author: Tyler Neill
7
+ Author-email: tyler.g.neill@gmail.com
8
+ License: CC BY-SA 4.0
9
+ Keywords: Sanskrit text transliteration scansion meter identification sandhi compound splitting
10
+ Description-Content-Type: text/markdown
11
+ Provides-Extra: testing
12
+ License-File: LICENSE.md
13
+
14
+ # skrutable
15
+
16
+ A toolkit and online workbench providing
17
+ transliteration, scansion, and meter identification for Sanskrit text,
18
+ as well access to powerful sandhi and compound splitting.
19
+
20
+ Web-app interface live online at [skrutable.info](https://www.skrutable.info).
21
+
22
+ Install package from [PyPi](https://pypi.org/project/skrutable/) with `pip install skrutable`.
23
+
24
+ See [manual.md](https://github.com/tylergneill/skrutable/blob/main/src/skrutable/manual.md) for instructions.
25
+
26
+ Feedback welcome!
27
+ My name is Tyler
28
+ ([Academia](https://uni-leipzig1.academia.edu/TylerNeill),
29
+ [LinkedIn](https://www.linkedin.com/in/tyler-g-neill/))
30
+ and my Gmail is tyler.g.neill.
31
+
32
+ And please share and share-alike! Licensed under a [Creative Commons Attribution-ShareAlike 4.0 International License](https://creativecommons.org/licenses/by-sa/4.0/).
@@ -0,0 +1 @@
1
+ __version__ = "2.0.2"
@@ -102,7 +102,12 @@ class Splitter(object):
102
102
  sentence_results.append(new_sentence)
103
103
  return sentence_results
104
104
 
105
- def _get_dharmamitra_split(self, text_input, preserve_compound_hyphens=True) -> List[str]:
105
+ def _get_dharmamitra_split(
106
+ self,
107
+ text_input: str,
108
+ preserve_compound_hyphens: bool=True,
109
+ batch_size: int=2000
110
+ ) -> List[str]:
106
111
  # TODO: change to "unsandhied-morphosyntax" when it works
107
112
  mode = "unsandhied-lemma-morphosyntax" if preserve_compound_hyphens else "unsandhied"
108
113
  """
@@ -117,31 +122,31 @@ class Splitter(object):
117
122
  headers = {
118
123
  'Content-Type': 'application/json',
119
124
  }
120
- data = {
121
- "input_sentence": text_input,
122
- "mode": mode,
123
- "input_encoding": "auto",
124
- "human_readable_tags": False,
125
- }
126
-
127
- response = requests.post(url, headers=headers, json=data)
128
125
 
129
- if response.status_code != 200:
130
- response.raise_for_status()
131
-
132
- return self._parse_dharmamitra_result(response.json(), mode)
126
+ sentences = text_input.split('\n')
127
+ results = []
128
+ for i in range(0, len(sentences), batch_size):
129
+ sentence_batch = sentences[i:i + batch_size]
130
+ batch_text_input = '\n'.join(sentence_batch)
131
+ data = {
132
+ "input_sentence": batch_text_input,
133
+ "mode": mode,
134
+ "input_encoding": "auto",
135
+ "human_readable_tags": False,
136
+ }
137
+ response = requests.post(url, headers=headers, json=data)
138
+ if response.status_code != 200:
139
+ response.raise_for_status()
140
+ batch_result = self._parse_dharmamitra_result(response.json(), mode)
141
+ results.extend(batch_result)
142
+
143
+ return results
133
144
 
134
145
  def _post_string_2018(self, input_text: str, url: str=SPLITTER_SERVER_URL):
135
146
  json_payload = {'input_text': input_text}
136
147
  result = requests.post(url, json=json_payload)
137
148
  return result.text
138
149
 
139
- def _post_file_2018(self, input_file_path: str, url: str=SPLITTER_SERVER_URL):
140
- input_file = open(input_file_path, 'rb')
141
- file_payload = {"input_file": input_file}
142
- result = requests.post(url, files=file_payload)
143
- return result.text
144
-
145
150
  def _clean_up_2018(self, split_sentences_str: str, split_appearance: str=' ') -> List[str]:
146
151
  for (r_1, r_2) in [
147
152
  ('-\n', '\n'), # remove line-final hyphens
@@ -176,7 +181,6 @@ class Splitter(object):
176
181
  splitter_model: str='dharmamitra_2024_sept',
177
182
  preserve_compound_hyphens: bool = PRESERVE_COMPOUND_HYPHENS_DEFAULT,
178
183
  preserve_punctuation: bool=PRESERVE_PUNCTUATION_DEFAULT,
179
- whole_file: bool=False,
180
184
  ) -> str:
181
185
  """
182
186
  Splits sandhi and compounds of multi-line Sanskrit string,
@@ -213,16 +217,7 @@ class Splitter(object):
213
217
 
214
218
  elif splitter_model == 'splitter_2018':
215
219
 
216
- split_sentences_str: str
217
-
218
- if whole_file:
219
- # write prepared string to Splitter input buffer and send as binary
220
- with open(SPLITTER_INPUT_BUFFER_FN, 'w') as f_out:
221
- f_out.write(sentences_str)
222
- split_sentences_str = self._post_file_2018(SPLITTER_INPUT_BUFFER_FN)
223
- else:
224
- split_sentences_str = self._post_string_2018(sentences_str)
225
-
220
+ split_sentences_str = self._post_string_2018(sentences_str)
226
221
  split_sentences = self._clean_up_2018(split_sentences_str)
227
222
 
228
223
  else:
@@ -0,0 +1,32 @@
1
+ Metadata-Version: 2.1
2
+ Name: skrutable
3
+ Version: 2.0.2
4
+ Summary: skrutable library for working with Sanskrit text
5
+ Home-page: https://github.com/tylergneill/skrutable
6
+ Author: Tyler Neill
7
+ Author-email: tyler.g.neill@gmail.com
8
+ License: CC BY-SA 4.0
9
+ Keywords: Sanskrit text transliteration scansion meter identification sandhi compound splitting
10
+ Description-Content-Type: text/markdown
11
+ Provides-Extra: testing
12
+ License-File: LICENSE.md
13
+
14
+ # skrutable
15
+
16
+ A toolkit and online workbench providing
17
+ transliteration, scansion, and meter identification for Sanskrit text,
18
+ as well access to powerful sandhi and compound splitting.
19
+
20
+ Web-app interface live online at [skrutable.info](https://www.skrutable.info).
21
+
22
+ Install package from [PyPi](https://pypi.org/project/skrutable/) with `pip install skrutable`.
23
+
24
+ See [manual.md](https://github.com/tylergneill/skrutable/blob/main/src/skrutable/manual.md) for instructions.
25
+
26
+ Feedback welcome!
27
+ My name is Tyler
28
+ ([Academia](https://uni-leipzig1.academia.edu/TylerNeill),
29
+ [LinkedIn](https://www.linkedin.com/in/tyler-g-neill/))
30
+ and my Gmail is tyler.g.neill.
31
+
32
+ And please share and share-alike! Licensed under a [Creative Commons Attribution-ShareAlike 4.0 International License](https://creativecommons.org/licenses/by-sa/4.0/).
skrutable-2.0.0/PKG-INFO DELETED
@@ -1,31 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: skrutable
3
- Version: 2.0.0
4
- Summary: skrutable library for working with Sanskrit text
5
- Home-page: https://github.com/tylergneill/skrutable
6
- Author: Tyler Neill
7
- Author-email: tyler.g.neill@gmail.com
8
- License: CC BY-SA 4.0
9
- Description: # skrutable
10
-
11
- A toolkit and online workbench providing
12
- transliteration, scansion, and meter identification for Sanskrit text,
13
- as well access to powerful sandhi and compound splitting.
14
-
15
- Web-app interface live online at [skrutable.info](https://www.skrutable.info).
16
-
17
- Install package from [PyPi](https://pypi.org/project/skrutable/) with `pip install skrutable`.
18
-
19
- See [manual.md](https://github.com/tylergneill/skrutable/blob/main/src/skrutable/manual.md) for instructions.
20
-
21
- Feedback welcome!
22
- My name is Tyler
23
- ([Academia](https://uni-leipzig1.academia.edu/TylerNeill),
24
- [LinkedIn](https://www.linkedin.com/in/tyler-g-neill/))
25
- and my Gmail is tyler.g.neill.
26
-
27
- And please share and share-alike! Licensed under a [Creative Commons Attribution-ShareAlike 4.0 International License](https://creativecommons.org/licenses/by-sa/4.0/).
28
- Keywords: Sanskrit text transliteration scansion meter identification sandhi compound splitting
29
- Platform: UNKNOWN
30
- Description-Content-Type: text/markdown
31
- Provides-Extra: testing
@@ -1 +0,0 @@
1
- __version__ = "2.0.0"
@@ -1,31 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: skrutable
3
- Version: 2.0.0
4
- Summary: skrutable library for working with Sanskrit text
5
- Home-page: https://github.com/tylergneill/skrutable
6
- Author: Tyler Neill
7
- Author-email: tyler.g.neill@gmail.com
8
- License: CC BY-SA 4.0
9
- Description: # skrutable
10
-
11
- A toolkit and online workbench providing
12
- transliteration, scansion, and meter identification for Sanskrit text,
13
- as well access to powerful sandhi and compound splitting.
14
-
15
- Web-app interface live online at [skrutable.info](https://www.skrutable.info).
16
-
17
- Install package from [PyPi](https://pypi.org/project/skrutable/) with `pip install skrutable`.
18
-
19
- See [manual.md](https://github.com/tylergneill/skrutable/blob/main/src/skrutable/manual.md) for instructions.
20
-
21
- Feedback welcome!
22
- My name is Tyler
23
- ([Academia](https://uni-leipzig1.academia.edu/TylerNeill),
24
- [LinkedIn](https://www.linkedin.com/in/tyler-g-neill/))
25
- and my Gmail is tyler.g.neill.
26
-
27
- And please share and share-alike! Licensed under a [Creative Commons Attribution-ShareAlike 4.0 International License](https://creativecommons.org/licenses/by-sa/4.0/).
28
- Keywords: Sanskrit text transliteration scansion meter identification sandhi compound splitting
29
- Platform: UNKNOWN
30
- Description-Content-Type: text/markdown
31
- Provides-Extra: testing
File without changes
File without changes
File without changes
File without changes