bbstrader 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bbstrader might be problematic. Click here for more details.

bbstrader/models/nlp.py CHANGED
@@ -1,14 +1,17 @@
1
+ import contextlib
2
+ import os
1
3
  import re
2
4
  import time
5
+ from concurrent.futures import ThreadPoolExecutor, as_completed
3
6
  from datetime import datetime
4
- from typing import Dict
7
+ from typing import Dict, List, Tuple
5
8
 
6
9
  import dash
10
+ import en_core_web_sm
7
11
  import matplotlib.pyplot as plt
8
12
  import nltk
9
13
  import pandas as pd
10
14
  import plotly.express as px
11
- import spacy
12
15
  from dash import dcc, html
13
16
  from dash.dependencies import Input, Output
14
17
  from nltk.corpus import stopwords
@@ -335,13 +338,12 @@ class TopicModeler(object):
335
338
  nltk.download("stopwords", quiet=True)
336
339
 
337
340
  try:
338
- self.nlp = spacy.load("en_core_web_sm")
341
+ self.nlp = en_core_web_sm.load()
339
342
  self.nlp.disable_pipes("ner")
340
343
  except OSError:
341
- raise RuntimeError(
342
- "The SpaCy model 'en_core_web_sm' is not installed.\n"
343
- "Please install it by running:\n"
344
- " python -m spacy download en_core_web_sm"
344
+ raise OSError(
345
+ "SpaCy model 'en_core_web_sm' not found. "
346
+ "Please install it using 'python -m spacy download en_core_web_sm'."
345
347
  )
346
348
 
347
349
  def preprocess_texts(self, texts: list[str]):
@@ -392,22 +394,25 @@ class SentimentAnalyzer(object):
392
394
  - Downloads NLTK tokenization (`punkt`) and stopwords.
393
395
  - Loads the `en_core_web_sm` SpaCy model with Named Entity Recognition (NER) disabled.
394
396
  - Initializes VADER's SentimentIntensityAnalyzer for sentiment scoring.
397
+
398
+ Args:
399
+ use_spacy (bool): If True, uses SpaCy for lemmatization. Defaults to False.
395
400
  """
396
401
  nltk.download("punkt", quiet=True)
397
402
  nltk.download("stopwords", quiet=True)
398
403
 
404
+ self.analyzer = SentimentIntensityAnalyzer()
405
+ self._stopwords = set(stopwords.words("english"))
406
+
399
407
  try:
400
- self.nlp = spacy.load("en_core_web_sm")
408
+ self.nlp = en_core_web_sm.load()
401
409
  self.nlp.disable_pipes("ner")
402
410
  except OSError:
403
- raise RuntimeError(
404
- "The SpaCy model 'en_core_web_sm' is not installed.\n"
405
- "Please install it by running:\n"
406
- " python -m spacy download en_core_web_sm"
411
+ raise OSError(
412
+ "SpaCy model 'en_core_web_sm' not found. "
413
+ "Please install it using 'python -m spacy download en_core_web_sm'."
407
414
  )
408
-
409
- self.analyzer = SentimentIntensityAnalyzer()
410
- self._stopwords = set(stopwords.words("english"))
415
+ self.news = FinancialNews()
411
416
 
412
417
  def preprocess_text(self, text: str):
413
418
  """
@@ -425,13 +430,20 @@ class SentimentAnalyzer(object):
425
430
  Returns:
426
431
  str: The cleaned and lemmatized text.
427
432
  """
433
+ if not isinstance(text, str):
434
+ raise ValueError(
435
+ f"{self.__class__.__name__}: preprocess_text expects a string, got {type(text)}"
436
+ )
428
437
  text = text.lower()
429
438
  text = re.sub(r"http\S+", "", text)
430
439
  text = re.sub(r"[^a-zA-Z\s]", "", text)
440
+
431
441
  words = word_tokenize(text)
432
442
  words = [word for word in words if word not in self._stopwords]
443
+
433
444
  doc = self.nlp(" ".join(words))
434
445
  words = [t.lemma_ for t in doc if t.lemma_ != "-PRON-"]
446
+
435
447
  return " ".join(words)
436
448
 
437
449
  def analyze_sentiment(self, texts, lexicon=None, textblob=False) -> float:
@@ -469,8 +481,96 @@ class SentimentAnalyzer(object):
469
481
  )
470
482
  return avg_sentiment
471
483
 
484
+ def _get_sentiment_for_one_ticker(
485
+ self,
486
+ ticker: str,
487
+ asset_type: str,
488
+ lexicon=None,
489
+ top_news=10,
490
+ **kwargs,
491
+ ) -> float:
492
+ rd_params = {"client_id", "client_secret", "user_agent"}
493
+ fm_params = {"start", "end", "page", "limit"}
494
+
495
+ # 1. Collect data from all sources
496
+ yahoo_news = self.news.get_yahoo_finance_news(
497
+ ticker, asset_type=asset_type, n_news=top_news
498
+ )
499
+ google_news = self.news.get_google_finance_news(
500
+ ticker, asset_type=asset_type, n_news=top_news
501
+ )
502
+
503
+ reddit_posts = []
504
+ if all(kwargs.get(rd) for rd in rd_params):
505
+ reddit_posts = self.news.get_reddit_posts(
506
+ ticker,
507
+ n_posts=top_news,
508
+ **{k: kwargs.get(k) for k in rd_params},
509
+ )
510
+
511
+ coindesk_news = self.news.get_coindesk_news(query=ticker, list_of_str=True)
512
+
513
+ fmp_source_news = []
514
+ if kwargs.get("fmp_api"):
515
+ fmp_news_client = self.news.get_fmp_news(kwargs.get("fmp_api"))
516
+ for src in ["articles"]:
517
+ try:
518
+ source_news = fmp_news_client.get_news(
519
+ ticker,
520
+ source=src,
521
+ symbol=ticker,
522
+ **{k: kwargs.get(k) for k in fm_params},
523
+ )
524
+ fmp_source_news.extend(source_news)
525
+ except Exception:
526
+ continue
527
+
528
+ # 2. Analyze sentiment for each source
529
+ news_sentiment = self.analyze_sentiment(
530
+ yahoo_news + google_news, lexicon=lexicon
531
+ )
532
+ reddit_sentiment = self.analyze_sentiment(
533
+ reddit_posts, lexicon=lexicon, textblob=True
534
+ )
535
+ fmp_sentiment = self.analyze_sentiment(
536
+ fmp_source_news, lexicon=lexicon, textblob=True
537
+ )
538
+ coindesk_sentiment = self.analyze_sentiment(
539
+ coindesk_news, lexicon=lexicon, textblob=True
540
+ )
541
+
542
+ # 3. Compute weighted average sentiment score
543
+ sentiments = [
544
+ news_sentiment,
545
+ reddit_sentiment,
546
+ fmp_sentiment,
547
+ coindesk_sentiment,
548
+ ]
549
+ # Count how many sources provided data to get a proper average
550
+ num_sources = sum(
551
+ 1
552
+ for source_data in [
553
+ yahoo_news + google_news,
554
+ reddit_posts,
555
+ fmp_source_news,
556
+ coindesk_news,
557
+ ]
558
+ if source_data
559
+ )
560
+
561
+ if num_sources == 0:
562
+ return 0.0
563
+
564
+ overall_sentiment = sum(sentiments) / num_sources
565
+ return overall_sentiment
566
+
472
567
  def get_sentiment_for_tickers(
473
- self, tickers, lexicon=None, asset_type="stock", top_news=10, **kwargs
568
+ self,
569
+ tickers: List[str] | List[Tuple[str, str]],
570
+ lexicon=None,
571
+ asset_type="stock",
572
+ top_news=10,
573
+ **kwargs,
474
574
  ) -> Dict[str, float]:
475
575
  """
476
576
  Computes sentiment scores for a list of financial tickers based on news and social media data.
@@ -487,9 +587,18 @@ class SentimentAnalyzer(object):
487
587
  3. Computes an overall sentiment score using a weighted average approach.
488
588
 
489
589
  Args:
490
- tickers (list of str): A list of stock, forex, crypto, or other asset tickers.
590
+ tickers (List[str] | List[Tuple[str, str]]): A list of asset tickers to analyze
591
+ - if using tuples, the first element is the ticker and the second is the asset type.
592
+ - if using a single string, the asset type must be specified or the default is "stock".
491
593
  lexicon (dict, optional): A custom sentiment lexicon to update VADER's default lexicon.
492
- asset_type (str, optional): The type of asset (e.g., "stock", "forex", "crypto"). Defaults to "stock".
594
+ asset_type (str, optional): The type of asset, Defaults to "stock",
595
+ supported types include:
596
+ - "stock": Stock symbols (e.g., AAPL, MSFT)
597
+ - "etf": Exchange-traded funds (e.g., SPY, QQQ)
598
+ - "future": Futures contracts (e.g., CL=F for crude oil)
599
+ - "forex": Forex pairs (e.g., EURUSD=X, USDJPY=X)
600
+ - "crypto": Cryptocurrency pairs (e.g., BTC-USD, ETH-USD)
601
+ - "index": Stock market indices (e.g., ^GSPC for S&P 500)
493
602
  top_news (int, optional): Number of news articles/posts to fetch per source. Defaults to 10.
494
603
  **kwargs: Additional parameters for API authentication and data retrieval, including:
495
604
  - fmp_api (str): API key for Financial Modeling Prep.
@@ -500,63 +609,60 @@ class SentimentAnalyzer(object):
500
609
  - Positive values indicate positive sentiment.
501
610
  - Negative values indicate negative sentiment.
502
611
  - Zero indicates neutral sentiment.
612
+ Notes:
613
+ The tickers names must follow yahoo finance conventions.
503
614
  """
615
+
504
616
  sentiment_results = {}
505
- rd_params = ["client_id", "client_secret", "user_agent"]
506
- news = FinancialNews()
507
- for ticker in tickers:
508
- # Collect data
509
- sources = 0
510
- yahoo_news = news.get_yahoo_finance_news(
511
- ticker, asset_type=asset_type, n_news=top_news
512
- )
513
- google_news = news.get_google_finance_news(
514
- ticker, asset_type=asset_type, n_news=top_news
515
- )
516
- reddit_posts = news.get_reddit_posts(
517
- ticker, n_posts=top_news, **{k: kwargs.get(k) for k in rd_params}
518
- )
519
- coindesk_news = news.get_coindesk_news(query=ticker, list_of_str=True)
520
- fmp_source_news = []
521
- fmp_news = news.get_fmp_news(kwargs.get("fmp_api"))
522
- for source in ["articles"]: # , "releases", asset_type]:
523
- try:
524
- source_news = fmp_news.get_news(
525
- ticker, source=source, symbol=ticker, **kwargs
526
- )
527
- fmp_source_news += source_news
528
- except Exception:
529
- source_news = []
530
- if any([len(s) > 0 for s in [yahoo_news, google_news]]):
531
- sources += 1
532
- for source in [reddit_posts, fmp_source_news, coindesk_news]:
533
- if len(source) > 0:
534
- sources += 1
535
- # Compute sentiment
536
- news_sentiment = self.analyze_sentiment(
537
- yahoo_news + google_news, lexicon=lexicon
538
- )
539
- reddit_sentiment = self.analyze_sentiment(
540
- reddit_posts, lexicon=lexicon, textblob=True
541
- )
542
- fmp_sentiment = self.analyze_sentiment(
543
- fmp_source_news, lexicon=lexicon, textblob=True
544
- )
545
- coindesk_sentiment = self.analyze_sentiment(
546
- coindesk_news, lexicon=lexicon, textblob=True
547
- )
548
617
 
549
- # Weighted average sentiment score
550
- if sources != 0:
551
- overall_sentiment = (
552
- news_sentiment
553
- + reddit_sentiment
554
- + fmp_sentiment
555
- + coindesk_sentiment
556
- ) / sources
557
- else:
558
- overall_sentiment = 0.0
559
- sentiment_results[ticker] = overall_sentiment
618
+ # Suppress stdout/stderr from underlying libraries during execution
619
+ with open(os.devnull, "w") as devnull:
620
+ with contextlib.redirect_stdout(devnull), contextlib.redirect_stderr(
621
+ devnull
622
+ ):
623
+ with ThreadPoolExecutor() as executor:
624
+ # Map each future to its ticker for easy result lookup
625
+ future_to_ticker = {}
626
+ for ticker_info in tickers:
627
+ # Normalize input to (ticker, asset_type)
628
+ if isinstance(ticker_info, tuple):
629
+ ticker_symbol, ticker_asset_type = ticker_info
630
+ else:
631
+ ticker_symbol, ticker_asset_type = ticker_info, asset_type
632
+
633
+ if ticker_asset_type not in [
634
+ "stock",
635
+ "etf",
636
+ "future",
637
+ "forex",
638
+ "crypto",
639
+ "index",
640
+ ]:
641
+ raise ValueError(
642
+ f"Unsupported asset type '{ticker_asset_type}' for {ticker_symbol}."
643
+ )
644
+
645
+ # Submit the job to the thread pool
646
+ future = executor.submit(
647
+ self._get_sentiment_for_one_ticker,
648
+ ticker=ticker_symbol,
649
+ asset_type=ticker_asset_type,
650
+ lexicon=lexicon,
651
+ top_news=top_news,
652
+ **kwargs,
653
+ )
654
+ future_to_ticker[future] = ticker_symbol
655
+
656
+ # Collect results as they are completed
657
+ for future in as_completed(future_to_ticker):
658
+ ticker_symbol = future_to_ticker[future]
659
+ try:
660
+ sentiment_score = future.result()
661
+ sentiment_results[ticker_symbol] = sentiment_score
662
+ except Exception:
663
+ sentiment_results[ticker_symbol] = (
664
+ 0.0 # Assign a neutral score on error
665
+ )
560
666
 
561
667
  return sentiment_results
562
668
 
@@ -651,8 +757,10 @@ class SentimentAnalyzer(object):
651
757
  bar and scatter plots. It fetches new sentiment data at specified intervals.
652
758
 
653
759
  Args:
654
- tickers (list[str]):
655
- A list of asset tickers (e.g., ["AAPL", "GOOGL", "TSLA"]).
760
+ tickers (List[str] | List[Tuple[str, str]]):
761
+ A list of financial asset tickers to analyze.
762
+ - If using tuples, the first element is the ticker and the second is the asset type.
763
+ - If using a single string, the asset type must be specified or defaults to "stock".
656
764
  asset_type (str, optional):
657
765
  The type of financial asset ("stock", "forex", "crypto"). Defaults to "stock".
658
766
  lexicon (dict, optional):
@@ -153,7 +153,7 @@ def optimized_weights(prices=None, returns=None, rfr=0.0, freq=252, method="equa
153
153
  freq : int, optional
154
154
  Number of days for calculating portfolio weights, such as 252 for a year's worth of daily returns (default is 252).
155
155
  method : str, optional
156
- Optimization method to use ('markowitz', 'hrp', or 'equal') (default is 'markowitz').
156
+ Optimization method to use ('markowitz', 'hrp', or 'equal') (default is 'equal').
157
157
 
158
158
  Returns
159
159
  -------