bbstrader 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bbstrader might be problematic. Click here for more details.

bbstrader/models/nlp.py CHANGED
@@ -2,15 +2,16 @@ import contextlib
2
2
  import os
3
3
  import re
4
4
  import time
5
+ from concurrent.futures import ThreadPoolExecutor, as_completed
5
6
  from datetime import datetime
6
7
  from typing import Dict, List, Tuple
7
8
 
8
9
  import dash
10
+ import en_core_web_sm
9
11
  import matplotlib.pyplot as plt
10
12
  import nltk
11
13
  import pandas as pd
12
14
  import plotly.express as px
13
- import en_core_web_sm
14
15
  from dash import dcc, html
15
16
  from dash.dependencies import Input, Output
16
17
  from nltk.corpus import stopwords
@@ -20,7 +21,6 @@ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
20
21
 
21
22
  from bbstrader.core.data import FinancialNews
22
23
 
23
-
24
24
  __all__ = [
25
25
  "TopicModeler",
26
26
  "SentimentAnalyzer",
@@ -345,7 +345,7 @@ class TopicModeler(object):
345
345
  "SpaCy model 'en_core_web_sm' not found. "
346
346
  "Please install it using 'python -m spacy download en_core_web_sm'."
347
347
  )
348
-
348
+
349
349
  def preprocess_texts(self, texts: list[str]):
350
350
  def clean_doc(Doc):
351
351
  doc = []
@@ -431,7 +431,9 @@ class SentimentAnalyzer(object):
431
431
  str: The cleaned and lemmatized text.
432
432
  """
433
433
  if not isinstance(text, str):
434
- raise ValueError(f"{self.__class__.__name__}: preprocess_text expects a string, got {type(text)}")
434
+ raise ValueError(
435
+ f"{self.__class__.__name__}: preprocess_text expects a string, got {type(text)}"
436
+ )
435
437
  text = text.lower()
436
438
  text = re.sub(r"http\S+", "", text)
437
439
  text = re.sub(r"[^a-zA-Z\s]", "", text)
@@ -479,8 +481,96 @@ class SentimentAnalyzer(object):
479
481
  )
480
482
  return avg_sentiment
481
483
 
484
+ def _get_sentiment_for_one_ticker(
485
+ self,
486
+ ticker: str,
487
+ asset_type: str,
488
+ lexicon=None,
489
+ top_news=10,
490
+ **kwargs,
491
+ ) -> float:
492
+ rd_params = {"client_id", "client_secret", "user_agent"}
493
+ fm_params = {"start", "end", "page", "limit"}
494
+
495
+ # 1. Collect data from all sources
496
+ yahoo_news = self.news.get_yahoo_finance_news(
497
+ ticker, asset_type=asset_type, n_news=top_news
498
+ )
499
+ google_news = self.news.get_google_finance_news(
500
+ ticker, asset_type=asset_type, n_news=top_news
501
+ )
502
+
503
+ reddit_posts = []
504
+ if all(kwargs.get(rd) for rd in rd_params):
505
+ reddit_posts = self.news.get_reddit_posts(
506
+ ticker,
507
+ n_posts=top_news,
508
+ **{k: kwargs.get(k) for k in rd_params},
509
+ )
510
+
511
+ coindesk_news = self.news.get_coindesk_news(query=ticker, list_of_str=True)
512
+
513
+ fmp_source_news = []
514
+ if kwargs.get("fmp_api"):
515
+ fmp_news_client = self.news.get_fmp_news(kwargs.get("fmp_api"))
516
+ for src in ["articles"]:
517
+ try:
518
+ source_news = fmp_news_client.get_news(
519
+ ticker,
520
+ source=src,
521
+ symbol=ticker,
522
+ **{k: kwargs.get(k) for k in fm_params},
523
+ )
524
+ fmp_source_news.extend(source_news)
525
+ except Exception:
526
+ continue
527
+
528
+ # 2. Analyze sentiment for each source
529
+ news_sentiment = self.analyze_sentiment(
530
+ yahoo_news + google_news, lexicon=lexicon
531
+ )
532
+ reddit_sentiment = self.analyze_sentiment(
533
+ reddit_posts, lexicon=lexicon, textblob=True
534
+ )
535
+ fmp_sentiment = self.analyze_sentiment(
536
+ fmp_source_news, lexicon=lexicon, textblob=True
537
+ )
538
+ coindesk_sentiment = self.analyze_sentiment(
539
+ coindesk_news, lexicon=lexicon, textblob=True
540
+ )
541
+
542
+ # 3. Compute weighted average sentiment score
543
+ sentiments = [
544
+ news_sentiment,
545
+ reddit_sentiment,
546
+ fmp_sentiment,
547
+ coindesk_sentiment,
548
+ ]
549
+ # Count how many sources provided data to get a proper average
550
+ num_sources = sum(
551
+ 1
552
+ for source_data in [
553
+ yahoo_news + google_news,
554
+ reddit_posts,
555
+ fmp_source_news,
556
+ coindesk_news,
557
+ ]
558
+ if source_data
559
+ )
560
+
561
+ if num_sources == 0:
562
+ return 0.0
563
+
564
+ overall_sentiment = sum(sentiments) / num_sources
565
+ return overall_sentiment
566
+
482
567
  def get_sentiment_for_tickers(
483
- self, tickers: List[str] | List[Tuple[str, str]], lexicon=None, asset_type="stock", top_news=10, **kwargs
568
+ self,
569
+ tickers: List[str] | List[Tuple[str, str]],
570
+ lexicon=None,
571
+ asset_type="stock",
572
+ top_news=10,
573
+ **kwargs,
484
574
  ) -> Dict[str, float]:
485
575
  """
486
576
  Computes sentiment scores for a list of financial tickers based on news and social media data.
@@ -501,7 +591,7 @@ class SentimentAnalyzer(object):
501
591
  - if using tuples, the first element is the ticker and the second is the asset type.
502
592
  - if using a single string, the asset type must be specified or the default is "stock".
503
593
  lexicon (dict, optional): A custom sentiment lexicon to update VADER's default lexicon.
504
- asset_type (str, optional): The type of asset, Defaults to "stock",
594
+ asset_type (str, optional): The type of asset, Defaults to "stock",
505
595
  supported types include:
506
596
  - "stock": Stock symbols (e.g., AAPL, MSFT)
507
597
  - "etf": Exchange-traded funds (e.g., SPY, QQQ)
@@ -522,82 +612,57 @@ class SentimentAnalyzer(object):
522
612
  Notes:
523
613
  The tickers names must follow yahoo finance conventions.
524
614
  """
615
+
525
616
  sentiment_results = {}
526
- rd_params = {"client_id", "client_secret", "user_agent"}
527
- fm_params = {"start", "end", "page", "limit"}
528
- with open(os.devnull, 'w') as devnull:
529
- with contextlib.redirect_stdout(devnull), contextlib.redirect_stderr(devnull):
530
- for asset in tickers:
531
- if isinstance(asset, tuple):
532
- ticker, asset_type = asset
533
- if asset_type not in [
534
- "stock",
535
- "etf",
536
- "future",
537
- "forex",
538
- "crypto",
539
- "index",
540
- ]:
541
- raise ValueError(
542
- f"Unsupported asset type '{asset_type}'. "
543
- "Supported types: stock, etf, future, forex, crypto, index."
544
- )
545
- # Collect data
546
- sources = 0
547
- yahoo_news = self.news.get_yahoo_finance_news(
548
- ticker, asset_type=asset_type, n_news=top_news
549
- )
550
- google_news = self.news.get_google_finance_news(
551
- ticker, asset_type=asset_type, n_news=top_news
552
- )
553
- reddit_posts = []
554
- if all(kwargs.get(rd) for rd in rd_params):
555
- reddit_posts = self.news.get_reddit_posts(
556
- ticker, n_posts=top_news, **{k: kwargs.get(k) for k in rd_params}
557
- )
558
- coindesk_news = self.news.get_coindesk_news(query=ticker, list_of_str=True)
559
- fmp_source_news = []
560
- if kwargs.get("fmp_api"):
561
- fmp_news = self.news.get_fmp_news(kwargs.get("fmp_api"))
562
- for src in ["articles"]: # , "releases", asset_type]:
563
- try:
564
- source_news = fmp_news.get_news(
565
- ticker, source=src, symbol=ticker, **{k: kwargs.get(k) for k in fm_params}
566
- )
567
- fmp_source_news += source_news
568
- except Exception:
569
- continue
570
- if any([len(s) > 0 for s in [yahoo_news, google_news]]):
571
- sources += 1
572
- for source in [reddit_posts, fmp_source_news, coindesk_news]:
573
- if len(source) > 0:
574
- sources += 1
575
- # Compute sentiment
576
- news_sentiment = self.analyze_sentiment(
577
- yahoo_news + google_news, lexicon=lexicon
578
- )
579
- reddit_sentiment = self.analyze_sentiment(
580
- reddit_posts, lexicon=lexicon, textblob=True
581
- )
582
- fmp_sentiment = self.analyze_sentiment(
583
- fmp_source_news, lexicon=lexicon, textblob=True
584
- )
585
- coindesk_sentiment = self.analyze_sentiment(
586
- coindesk_news, lexicon=lexicon, textblob=True
587
- )
588
617
 
589
- # Weighted average sentiment score
590
- if sources != 0:
591
- overall_sentiment = (
592
- news_sentiment
593
- + reddit_sentiment
594
- + fmp_sentiment
595
- + coindesk_sentiment
596
- ) / sources
597
- else:
598
- overall_sentiment = 0.0
599
- sentiment_results[ticker] = overall_sentiment
600
- time.sleep(1) # To avoid hitting API rate limits
618
+ # Suppress stdout/stderr from underlying libraries during execution
619
+ with open(os.devnull, "w") as devnull:
620
+ with contextlib.redirect_stdout(devnull), contextlib.redirect_stderr(
621
+ devnull
622
+ ):
623
+ with ThreadPoolExecutor() as executor:
624
+ # Map each future to its ticker for easy result lookup
625
+ future_to_ticker = {}
626
+ for ticker_info in tickers:
627
+ # Normalize input to (ticker, asset_type)
628
+ if isinstance(ticker_info, tuple):
629
+ ticker_symbol, ticker_asset_type = ticker_info
630
+ else:
631
+ ticker_symbol, ticker_asset_type = ticker_info, asset_type
632
+
633
+ if ticker_asset_type not in [
634
+ "stock",
635
+ "etf",
636
+ "future",
637
+ "forex",
638
+ "crypto",
639
+ "index",
640
+ ]:
641
+ raise ValueError(
642
+ f"Unsupported asset type '{ticker_asset_type}' for {ticker_symbol}."
643
+ )
644
+
645
+ # Submit the job to the thread pool
646
+ future = executor.submit(
647
+ self._get_sentiment_for_one_ticker,
648
+ ticker=ticker_symbol,
649
+ asset_type=ticker_asset_type,
650
+ lexicon=lexicon,
651
+ top_news=top_news,
652
+ **kwargs,
653
+ )
654
+ future_to_ticker[future] = ticker_symbol
655
+
656
+ # Collect results as they are completed
657
+ for future in as_completed(future_to_ticker):
658
+ ticker_symbol = future_to_ticker[future]
659
+ try:
660
+ sentiment_score = future.result()
661
+ sentiment_results[ticker_symbol] = sentiment_score
662
+ except Exception:
663
+ sentiment_results[ticker_symbol] = (
664
+ 0.0 # Assign a neutral score on error
665
+ )
601
666
 
602
667
  return sentiment_results
603
668
 
@@ -153,7 +153,7 @@ def optimized_weights(prices=None, returns=None, rfr=0.0, freq=252, method="equa
153
153
  freq : int, optional
154
154
  Number of days for calculating portfolio weights, such as 252 for a year's worth of daily returns (default is 252).
155
155
  method : str, optional
156
- Optimization method to use ('markowitz', 'hrp', or 'equal') (default is 'markowitz').
156
+ Optimization method to use ('markowitz', 'hrp', or 'equal') (default is 'equal').
157
157
 
158
158
  Returns
159
159
  -------