PyPI - opsci-toolbox - Versions diffs - 0.0.2__py3-none-any.whl → 0.0.6__py3-none-any.whl - Mend

opsci-toolbox 0.0.2py3-none-any.whl → 0.0.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

opsci_toolbox/apis/rapidapi_helpers.py +82 -0
opsci_toolbox/helpers/common.py +566 -191
opsci_toolbox/helpers/cv.py +298 -123
opsci_toolbox/helpers/dataviz.py +1005 -216
opsci_toolbox/helpers/dates.py +55 -8
opsci_toolbox/helpers/nlp.py +768 -110
opsci_toolbox/helpers/nlp_cuml.py +280 -0
opsci_toolbox/helpers/sna.py +101 -10
opsci_toolbox/helpers/surreaction.py +156 -0
{opsci_toolbox-0.0.2.dist-info → opsci_toolbox-0.0.6.dist-info}/METADATA +9 -11
opsci_toolbox-0.0.6.dist-info/RECORD +21 -0
opsci_toolbox-0.0.2.dist-info/RECORD +0 -19
{opsci_toolbox-0.0.2.dist-info → opsci_toolbox-0.0.6.dist-info}/WHEEL +0 -0
{opsci_toolbox-0.0.2.dist-info → opsci_toolbox-0.0.6.dist-info}/top_level.txt +0 -0

opsci_toolbox/apis/rapidapi_helpers.py CHANGED Viewed

@@ -5,6 +5,7 @@ from tqdm import tqdm
 import re
 from datetime import datetime,timedelta
 from opsci_toolbox.helpers.dates import str_to_datetime
+from opsci_toolbox.helpers.common import write_jsonl
 def create_queries_per_period(query, publishedAfter, publishedBefore, col_publishedAfter = "start_date", col_publishedBefore = "end_date", date_format = '%Y-%m-%d', rolling_days = 7 ):
     datetime_publishedAfter = datetime.strptime(publishedAfter, date_format)
@@ -278,6 +279,31 @@ def parse_tweet(json_data):
     df = pd.DataFrame.from_records(all_records, columns = all_cols)
     return df
+def parse_twitter_list_details(json_data):
+    """
+    Parse list results from https://rapidapi.com/omarmhaimdat/api/twitter154
+    """
+    list_id = json_data.get("list_id", "")
+    list_id_str = json_data.get("list_id_str", "")
+    member_count = json_data.get("member_count", 0)
+    name = json_data.get("name", "")
+    suscriber_count = json_data.get("subscriber_count", 0)
+    creation_date = json_data.get("creation_date", 0)
+    mode = json_data.get("mode", "0")
+    user_record = parse_user(json_data.get("user", {}))
+    record = (list_id, list_id_str, member_count, name, suscriber_count, creation_date, mode) + user_record
+    cols = ["list_id", "list_id_str", "member_count", "name", "suscriber_count", "creation_date", "mode", "user_creation_date", "user_id", "user_username", "user_name", "user_follower_count", "user_following_count", "user_favourites_count", "user_is_private", "user_is_verified", "user_is_blue_verified", "user_location", "user_profile_pic_url", "user_profile_banner_url", "user_description", "user_external_url", "user_number_of_tweets", "user_bot", "user_timestamp", "user_has_nft_avatar", "user_category", "user_default_profile", "user_default_profile_image", "user_listed_count", "user_verified_type"]
+    df = pd.DataFrame.from_records(record, cols)
+    return df
+######################################################################################
+# function to parse Instagram data
+# https://rapidapi.com/JoTucker/api/instagram-scraper2
+# https://instagram-scraper2.p.rapidapi.com/hash_tag_medias_v2
+######################################################################################
 def instagram_parse_hashtag_data(hashtag_data):
     hashtag_id =  hashtag_data.get("id")
     hashtag_name =  hashtag_data.get("name")
@@ -324,3 +350,59 @@ def instagram_parse_hashtag_data(hashtag_data):
     return df
+######################################################################################
+# function to parse Twitter data
+# https://rapidapi.com/twttrapi-twttrapi-default/api/twttrapi
+######################################################################################
+def compile_list_entries(json_data, path_json, filename):
+    """
+    Function to return next cursor and list details from https://twttrapi.p.rapidapi.com/list-members
+    """
+    results = []
+    next_cursor = None
+    entries = json_data.get('data', {}).get('list', {}).get('timeline_response', {}).get("timeline", {}).get("instructions", [{}])[-1].get('entries',[])
+    if len(entries)>0:
+        for entry in entries:
+            content = entry.get("content")
+            if (content.get("__typename") == "TimelineTimelineCursor") & (content.get("cursorType") =="Bottom"):
+                next_cursor = content.get("value", None)
+                if next_cursor:
+                    if next_cursor.split('|')[0]=="0":
+                        next_cursor = None
+            if content.get("__typename") != "TimelineTimelineCursor":
+                legacy = content.get("content", {}). get('userResult', {}).get("result", {}).get("legacy", {})
+                results.append(legacy)
+    write_jsonl(results, path_json, filename)
+    return results, next_cursor
+def parse_list_entries(jsonl_data):
+    """
+    Function to parse list details from https://twttrapi.p.rapidapi.com/list-members
+    """
+    all_records=[]
+    for data in jsonl_data:
+        id_str = data.get("id_str","")
+        name = data.get("name","")
+        screen_name = data.get("screen_name", "")
+        created_at = data.get("created_at")
+        description = data.get("description")
+        statuses_count = data.get("statuses_count", 0)
+        followers_count = data.get("followers_count",0)
+        friends_count = data.get("friends_count",0)
+        favourites_count = data.get("favourites_count",0)
+        media_count = data.get("media_count",0)
+        protected = data.get("protected", False)
+        verified = data.get("verified", False)
+        verified_type = data.get("verified_type", "")
+        entities = data.get("entities")
+        urls = [url.get("expanded_url","") for url in entities.get('url', {}).get("urls",[])]
+        user_mentions = [um.get("screen_name","") for um in entities.get('description', {}).get('user_mentions', [])]
+        user_mentions_indices = [um.get("indices",[]) for um in entities.get('description', {}).get('user_mentions', [])]
+        hashtags = [um.get("text","") for um in entities.get('description', {}).get('hashtags', [])]
+        hashtags_indices = [um.get("indices",[]) for um in entities.get('description', {}).get('hashtags', [])]
+        record = (id_str, name, screen_name, created_at, description, statuses_count, followers_count, friends_count, favourites_count, media_count, protected, verified, verified_type, urls, user_mentions, user_mentions_indices, hashtags, hashtags_indices)
+        all_records.append(record)
+    df = pd.DataFrame.from_records(all_records, columns = ["id_str", "name", "screen_name", "created_at", "description", "statuses_count", "followers_count", "friends_count", "favourites_count", "media_count", "protected", "verified", "verified_type", "urls", "user_mentions", "user_mentions_indices", "hashtags", "hashtags_indices"])
+    return df

opsci-toolbox 0.0.2__py3-none-any.whl → 0.0.6__py3-none-any.whl

opsci-toolbox 0.0.2py3-none-any.whl → 0.0.6py3-none-any.whl