PyPI - invesytoolbox - Versions diffs - 0.0.22__tar.gz → 0.0.23__tar.gz - Mend

invesytoolbox 0.0.22tar.gz → 0.0.23tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

{invesytoolbox-0.0.22 → invesytoolbox-0.0.23}/HISTORY.md RENAMED Viewed

@@ -1,4 +1,13 @@
 # History
+## 0.0.23 (2024-07-22)
+* **adjust_spaces_on_punctuation**: inserts narrow (or normal spaces) for certain languages
+* **value_2datatype**:
+  - default fmt changed to None (hopefully it doesn't break somewhere, but this is necessary)
+  - 'pendulum' is now an option for the parameter
+- default for parameter **fmt** changed to None (this is necessary for the new pendulum conversion in **value_2datatype** to work) for
+	- **dict_2datatypes**
+	- **dictlist_2datatypes**
 ## 0.0.22 (2023-10-14)
 * **normalize_name** and **could_be_a_name** now have the boolean parameter *lastname* that indicates that a single-word name is to be treated as a last name, not a first name.
 * **get_locale**: locale can be only the language, without the country.

{invesytoolbox-0.0.22/src/invesytoolbox.egg-info → invesytoolbox-0.0.23}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: invesytoolbox
-Version: 0.0.22
+Version: 0.0.23
 Summary: Tools for Python scripts or terminal
 Home-page: https://gitlab.com/Rastaf/invesytoolbox
 Author: Georg Pfolz
@@ -44,6 +44,15 @@ That's also why all date and time functions also take into account the old DateT
 The documentation can be found [here](https://rastaf.gitlab.io/invesytoolbox/).
 # History
+## 0.0.23 (2024-07-22)
+* **adjust_spaces_on_punctuation**: inserts narrow (or normal spaces) for certain languages
+* **value_2datatype**:
+  - default fmt changed to None (hopefully it doesn't break somewhere, but this is necessary)
+  - 'pendulum' is now an option for the parameter
+- default for parameter **fmt** changed to None (this is necessary for the new pendulum conversion in **value_2datatype** to work) for
+	- **dict_2datatypes**
+	- **dictlist_2datatypes**
 ## 0.0.22 (2023-10-14)
 * **normalize_name** and **could_be_a_name** now have the boolean parameter *lastname* that indicates that a single-word name is to be treated as a last name, not a first name.
 * **get_locale**: locale can be only the language, without the country.

{invesytoolbox-0.0.22 → invesytoolbox-0.0.23}/setup.cfg RENAMED Viewed

@@ -1,6 +1,6 @@
 [metadata]
 name = invesytoolbox
-version = 0.0.22
+version = 0.0.23
 author = Georg Pfolz
 author_email = georg.pfolz@invesy.at
 description = Tools for Python scripts or terminal

{invesytoolbox-0.0.22 → invesytoolbox-0.0.23}/src/invesytoolbox/itb_data.py RENAMED Viewed

@@ -8,6 +8,7 @@ from typing import Union, List, Dict, Any, Optional
 import DateTime
 import datetime
 import vobject
+import pendulum
 from dateutil.parser import parse
 from itb_date_time import convert_datetime, str_to_date, str_to_DT, get_dateformat
 from itb_email_phone import process_phonenumber
@@ -38,9 +39,7 @@ def dict_from_dict_list(
     single_value: Optional[bool] = None,
     include_key: Optional[bool] = False
 ) -> dict:
-    """Create a dictionary from a list of dictionaries
-    .. note:: The annotated return type is not true if a single_value argument is given.
+    """ Create a dictionary from a list of dictionaries
     :param dictList: the list of dictionaries to be converted
     :param key: the key of the dictionaries to use as key for the new dictionary.
@@ -70,7 +69,7 @@ def create_vcard(
     data: Dict[str, str],
     returning: str = 'vcard'
 ) -> str:
-    """create a vCard from a dictionary
+    """ create a vCard from a dictionary
     .. note:: The serialized vcard has Windows line-breaks, which is fine, I guess
     .. note:: colons in notes are escaped in vobject
@@ -134,7 +133,7 @@ def dict_2unicode(
     d: Dict[Union[str, bytes], Any],
     encoding: str = 'utf-8'
 ) -> dict:
-    """ Converts all keys and values in a dictionary from bytes to unicode
+    """ Convert all keys and values in a dictionary from bytes to unicode
     All keys and values are changed from bytes to unicode, if applicable.
     Other data types are left unchaged, including other compound data types.
@@ -156,9 +155,27 @@ def dict_2datatypes(
     convert_keys: bool = False,
     convert_to_unicode: bool = False,
     dt: str = 'datetime',
-    fmt: str = '%d.%m.%Y'
+    fmt: str = None
 ) -> Dict[Any, Any]:
-    """ convert all data of a dictionary to specific (json metadata) or guessed types """
+    """ Convert all data of a dictionary to specific (json metadata) or guessed types
+    :param d: dictionary
+    :param metadata: dictionary containing types for the keys:
+               - boolean
+               - int
+               - float
+               - date
+               - datetime: date and time
+               - time: conversion or not based on dt
+    :param convert_keys: convert keys to unicode
+    :param convert_to_unicode: convert all bytes to unicode in the process
+    :param dt: - datetime or dt
+               - DateTime or DT
+               - ignore or string
+               - pendulum
+               - default: datetime, because it's Python's default
+    :param fmt: format for datetime parsing
+    """
     if convert_keys:
         return {
@@ -197,15 +214,35 @@ def dictlist_2datatypes(
     convert_keys: bool = False,
     convert_to_unicode: bool = False,
     dt: str = 'datetime',
-    fmt: str = '%d.%m.%Y'
+    fmt: str = None
 ) -> list:
+    """ Convert all data of a list of dictionaries to specific (json metadata) or guessed types
+    :param dictList: list of dictionaries
+    :param metadata: dictionary containing types for the keys:
+                - boolean
+                - int
+                - float
+                - date
+                - datetime: date and time
+                - time: conversion or not based on dt
+    :param convert_keys: convert keys to unicode
+    :param convert_to_unicode: convert all bytes to unicode in the process
+    :param dt: - datetime or dt
+               - DateTime or DT
+               - ignore or string
+               - pendulum
+               - default: datetime, because it's Python's default
+    :param fmt: format for datetime parsing
+    """
     return [
         dict_2datatypes(
             d=dic,
             metadata=metadata,
             convert_keys=convert_keys,
             convert_to_unicode=convert_to_unicode,
-            dt=dt
+            dt=dt,
+            fmt=fmt
         )
         for dic
         in dictlist
@@ -258,17 +295,18 @@ def value_2datatype(
     convert_to_unicode: bool = False,
     encoding: str = 'utf-8',
     dt: str = 'datetime',
-    fmt: str = '%d.%m.%Y',  # only for datetime
-    UTC: bool = False
+    fmt: str = None,
+    timezone: str = 'local'
 ) -> Union[str, int, float]:
     """ Convert a value to a datatype
     this function has two modes:
-    1. it is provided a type for the conversion
+    1. a type is provided for the conversion
     2. it makes educated guesses in converting a string
     :param value:
+    :param typ: type for the conversion
     :param metadata: dictionary containing types for the keys:
                - boolean
                - int
@@ -281,7 +319,10 @@ def value_2datatype(
     :param encoding: if bytes are present, use this encoding to convert them to unicode
     :param dt: - datetime or dt
                - DateTime or DT
+               - ignore or string
+               - pendulum
                - default: datetime, because it's Python's default
+    :param timezone: default is 'local', other options are 'UTC' or 'Europe/Vienna'
     """
     if typ:
         if typ == 'boolean':
@@ -311,6 +352,17 @@ def value_2datatype(
                     value,
                     datefmt='international'
                 ) + 0.5  # to avoid day-shifting due to timezones, use 12am
+            elif dt == 'pendulum':
+                if fmt:
+                    p_dt = pendulum.from_format(value, fmt)
+                else:
+                    p_dt = pendulum.parse(value)
+                if p_dt.tzinfo == 'UTC' and timezone == 'local':
+                    return p_dt.in_tz(pendulum.local_timezone())
+                elif timezone:
+                    return p_dt.in_tz(timezone)
+                return p_dt.date()
             elif dt in ('ignore', 'string'):
                 return value
             else:
@@ -327,6 +379,17 @@ def value_2datatype(
                     )
                 except DateTime.interfaces.SyntaxError:
                     return value
+            elif dt == 'pendulum':
+                if fmt:
+                    p_dt = pendulum.from_format(value, fmt)
+                else:
+                    p_dt = pendulum.parse(value)
+                if p_dt.tzinfo == 'UTC' and timezone == 'local':
+                    return p_dt.in_tz(pendulum.local_timezone())
+                elif timezone:
+                    return p_dt.in_tz(timezone)
+                return p_dt
             elif dt in ('ignore', 'string'):
                 return value
             else:
@@ -339,7 +402,6 @@ def value_2datatype(
                 return value  # DT: return the string
     else:
         if isinstance(value, (bool, int, float)):
             return value
@@ -373,12 +435,7 @@ def value_2datatype(
                     datestring=value,
                     checkonly=True
                 ):
-                    if ':' in value:
-                        return convert_datetime(
-                            date=value,
-                            convert_to=dt
-                        )
-                    elif dt in ('datetime', 'dt'):
+                    if dt in ('datetime', 'dt'):
                         return str_to_date(
                             datestring=value
                         )
@@ -386,6 +443,22 @@ def value_2datatype(
                         return str_to_DT(
                             datestring=value
                         )
+                    elif dt == 'pendulum':
+                        if fmt:
+                            p_dt = pendulum.from_format(value, fmt)
+                        else:
+                            p_dt = pendulum.parse(value)
+                        if p_dt.tzinfo == 'UTC' and timezone == 'local':
+                            return p_dt.in_tz(pendulum.local_timezone())
+                        elif timezone:
+                            return p_dt.in_tz(timezone)
+                        return p_dt
+                    else:
+                        return convert_datetime(
+                            date=value,
+                            convert_to=dt
+                        )
             except Exception:
                 # in case it's a subset of datetime_chars but not a date or datetime or time

{invesytoolbox-0.0.22 → invesytoolbox-0.0.23}/src/invesytoolbox/itb_date_time.py RENAMED Viewed

@@ -37,6 +37,8 @@ def is_valid_datetime_string(
     a conflict with 4-digit year values, it is considered invalid
     :param returning: can return a tuple
+    .. note:: This does not work with ISO8601 strings! (returns False)
     """
     return str_to_dt(
         datestring=datestring,
@@ -57,6 +59,14 @@ def get_dateformat(
     :todo: times!
     :raises ValueError: if an invalid datetime or DateTime string is provided
     """
+    # first let's check for ISO8601 strings
+    if checkonly:
+        try:
+            pendulum.parse(datestring)
+            return True
+        except pendulum.parsing.exceptions.ParserError:
+            pass  # don't return False, because it may be a valid date string
     timefmt = ''
     if ' ' in datestring:
         try:
@@ -225,7 +235,7 @@ def str_to_pendulum(
     if not fmt:
         return pendulum.parse(datestring)
     else:
-        return pendulum.datetime.strptime(datestring, fmt)
+        return pendulum.from_format(datestring, fmt)
 def str_to_date(

{invesytoolbox-0.0.22 → invesytoolbox-0.0.23}/src/invesytoolbox/itb_html.py RENAMED Viewed

@@ -15,7 +15,7 @@ def prettify_html(
     .. note:: This function is needed for Zope Python Scripts
         because even if bs4 can be imported, prettify throws
-        an Unauthorized error in restricted Python
+        an Unauthorized error in Restricted Python
     """
     return BeautifulSoup(html, "html.parser").prettify()

{invesytoolbox-0.0.22 → invesytoolbox-0.0.23}/src/invesytoolbox/itb_text_name.py RENAMED Viewed

@@ -14,7 +14,7 @@ from nameparser import HumanName
 d = gender.Detector(case_sensitive=False)
-char_nb_map = {
+CHAR_NB_MAP = {
     'a': ('a', '4', '@'),
     'b': ('b', '6', '8', 'I3', '13', '!3'),
     'c': ('c', '('),
@@ -43,6 +43,54 @@ char_nb_map = {
     'z': ('z', '2', '7_')
 }
+# regex pattern
+LANG_PUNCT_SPACES = {
+    'fr': {
+        'regex': r'([!?:;%])',
+        'narrow': '\u202f\\1',
+        'normal': r' \1'
+    },
+    'es': {
+        'regex': r'([¿¡])',
+        'narrow': '\\1\u202f',
+        'normal': r'\1 '
+    },
+    'ro': {
+        'regex': r'([!?:;])',
+        'narrow': '\u202f\\1',
+        'normal': r' \1'
+    }
+}
+def adjust_spaces_on_punctuation(
+    text: str,
+    language: str,
+    space: str = 'narrow'
+):
+    """
+    Adjust spaces before or after punctuation marks according to the language.
+    By default the narrow space (\u202f) is used for adjustments.
+    """
+    # Step 1: Remove existing spaces before the punctuation
+    text = re.sub(r'\s+([!?:;%\),.])', r'\1', text)
+    lang_data = LANG_PUNCT_SPACES.get(language)
+    if lang_data:
+        # Step 2: Add a narrow space (\u202f) before the punctuation
+        try:
+            text = re.sub(
+                lang_data['regex'],
+                lang_data[space],
+                text
+            )
+        except re.error as e:
+            raise Exception(f"Error in regex for language {language}: {e}")
+    else:
+        # Original behavior for other languages
+        text = re.sub(r'\s+([,.!?;:\)])', r'\1', text)
+    return text
 def and_list(
     elements: list,
@@ -202,7 +250,7 @@ def leet(
         if not c.isalnum():
             continue  # without counting
-        c = random.choice(char_nb_map[c])
+        c = random.choice(CHAR_NB_MAP[c])
         if not random.randrange(change_uppercase):
             c = c.upper()

{invesytoolbox-0.0.22 → invesytoolbox-0.0.23/src/invesytoolbox.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: invesytoolbox
-Version: 0.0.22
+Version: 0.0.23
 Summary: Tools for Python scripts or terminal
 Home-page: https://gitlab.com/Rastaf/invesytoolbox
 Author: Georg Pfolz
@@ -44,6 +44,15 @@ That's also why all date and time functions also take into account the old DateT
 The documentation can be found [here](https://rastaf.gitlab.io/invesytoolbox/).
 # History
+## 0.0.23 (2024-07-22)
+* **adjust_spaces_on_punctuation**: inserts narrow (or normal spaces) for certain languages
+* **value_2datatype**:
+  - default fmt changed to None (hopefully it doesn't break somewhere, but this is necessary)
+  - 'pendulum' is now an option for the parameter
+- default for parameter **fmt** changed to None (this is necessary for the new pendulum conversion in **value_2datatype** to work) for
+	- **dict_2datatypes**
+	- **dictlist_2datatypes**
 ## 0.0.22 (2023-10-14)
 * **normalize_name** and **could_be_a_name** now have the boolean parameter *lastname* that indicates that a single-word name is to be treated as a last name, not a first name.
 * **get_locale**: locale can be only the language, without the country.

{invesytoolbox-0.0.22 → invesytoolbox-0.0.23}/src/tests/test_data.py RENAMED Viewed

@@ -8,18 +8,21 @@ import sys
 import unittest
 import datetime
 import DateTime
+import pendulum
 from dateutil.parser import parse
 sys.path.append(".")
-from itb_data import \
-    any_2boolean, \
-    create_vcard, \
-    dict_2unicode, \
-    dict_2datatypes, \
-    dict_from_dict_list, \
-    dictlist_2datatypes, \
-    sort_dictlist
+from itb_data import (
+    any_2boolean,
+    create_vcard,
+    dict_2unicode,
+    dict_2datatypes,
+    dict_from_dict_list,
+    dictlist_2datatypes,
+    sort_dictlist,
+    value_2datatype
+)
 dict_list = [
     {'a': 1, 'b': 2, 'c': 5},
@@ -154,6 +157,9 @@ END:VCARD
 class TestData(unittest.TestCase):
+    # verbose, show complete diff
+    maxDiff = None
     def test_any_2boolean(self):
         for a, b in test_boolean.items():
             self.assertEqual(
@@ -260,7 +266,28 @@ class TestData(unittest.TestCase):
         )
     def test_value_2datatype(self):
-        pass  # this is already tested extensively in test_dict_2datatypes
+        # this is already tested extensively in test_dict_2datatypes
+        # so we just test pendulum here
+        print('test_value_2datatype: this should result in pendulum datetimes')
+        value = '01.04.2022 10:32'
+        fmt = 'DD.MM.YYYY HH:mm'
+        p = value_2datatype(
+            value=value,
+            typ='datetime',
+            dt='pendulum',
+            fmt=fmt
+        )
+        print(value, '→', p, p.tzinfo)
+        value = '2024-04-01T10:32:00'
+        p = value_2datatype(
+            value=value,
+            dt='pendulum'
+        )
+        print(value, '→', p, p.tzinfo)
     def test_sort_dictlist(self):
         sorted_dict_list = sort_dictlist(
@@ -297,6 +324,10 @@ class TestData(unittest.TestCase):
         )
     def test_dictlist_2datatypes(self):
+        # print('dict_for_testing', dict_for_testing)
+        # print('dict_for_testing_datatyped_unicoded', dict_for_testing_datatyped_unicoded)
         dictlist_for_testing = [
             dict_for_testing,
             dict_for_testing,
@@ -323,6 +354,16 @@ class TestData(unittest.TestCase):
             dictlist_tested_datatyped
         )
+        dictlist_pendulum = dictlist_2datatypes(
+            dictlist=dictlist_for_testing,
+            convert_keys=True,
+            convert_to_unicode=True,
+            dt='pendulum',
+            fmt='DD.MM.YYYY'
+        )
+        print('dictlist_pendulum', dictlist_pendulum)
 if __name__ == '__main__':
     unittest.main()

{invesytoolbox-0.0.22 → invesytoolbox-0.0.23}/src/tests/test_text_name.py RENAMED Viewed

@@ -11,6 +11,7 @@ import random
 sys.path.append(".")
 from itb_text_name import (
+    adjust_spaces_on_punctuation,
     and_list,
     capitalize_name,
     get_gender,
@@ -295,8 +296,40 @@ reference_names_sorted = [
 lower_text = 'das ist ein Beispiel-Text, der kapitalisiert werden kann.'
+# Language Texts (for adjusting spaces on the indentation)
+punct_texts = {
+    'de': "Hallo Welt ! Wie geht es Ihnen ? Ich hoffe, Sie haben einen schönen Tag : Das Wetter ist herrlich . Wussten Sie, dass 75 % aller Statistiken erfunden sind?",
+    'en': "Hello world ! How are you today ? I hope you're having a great day : The weather is lovely . Did you know that 80 % of all statistics are made up?",
+    'fr': "Bonjour le monde! Comment allez-vous aujourd'hui? J'espère que vous passez une bonne journée: Le temps est magnifique . Saviez-vous que 85% des statistiques sont inventées?",
+    'es': "¡Hola mundo! ¿Cómo estás hoy ? Espero que tengas un buen día : El clima es hermoso. ¿Sabías que el 90 % de las estadísticas son inventadas ?",
+    'ro': "Salut lume! Cum ești azi? Sper că ai o zi bună: Vremea este minunată.Știai că 95 % din statistici sunt inventate?"
+}
+punct_texts_corrected = {
+    'de': "Hallo Welt! Wie geht es Ihnen? Ich hoffe, Sie haben einen schönen Tag: Das Wetter ist herrlich. Wussten Sie, dass 75% aller Statistiken erfunden sind?",
+    'en': "Hello world! How are you today? I hope you're having a great day: The weather is lovely. Did you know that 80% of all statistics are made up?",
+    'fr': "Bonjour le monde\u202f! Comment allez-vous aujourd'hui\u202f? J'espère que vous passez une bonne journée\u202f: Le temps est magnifique. Saviez-vous que 85\u202f% des statistiques sont inventées\u202f?",
+    'es': "¡\u202fHola mundo! ¿\u202fCómo estás hoy? Espero que tengas un buen día: El clima es hermoso. ¿\u202fSabías que el 90% de las estadísticas son inventadas?",
+    'ro': "Salut lume\u202f! Cum ești azi\u202f? Sper că ai o zi bună\u202f: Vremea este minunată.Știai că 95% din statistici sunt inventate\u202f?"
+}
 class TestTextName(unittest.TestCase):
+    def test_adjust_spaces_on_punctuation(self):
+        for lang, text in punct_texts.items():
+            print(f'Language: {lang}')
+            try:
+                corrected_text = adjust_spaces_on_punctuation(
+                    text = text,
+                    language = lang
+                )
+            except Exception as e:
+                print(f'Error: {e}')
+                raise AssertionError(f'Error: {e}')
+            self.assertEqual(
+                corrected_text,
+                punct_texts_corrected[lang]
+            )
     def test_and_list(self):
         a_list = [1, 'Georg', 'Haus', True]
         correct_str = '1, Georg, Haus and True'